aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/smpboot.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel/smpboot.c')
-rw-r--r--arch/x86/kernel/smpboot.c176
1 files changed, 88 insertions, 88 deletions
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 42a2dca984b3..2d5200e56357 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -102,6 +102,8 @@ DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map);
102DEFINE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info); 102DEFINE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info);
103EXPORT_PER_CPU_SYMBOL(cpu_info); 103EXPORT_PER_CPU_SYMBOL(cpu_info);
104 104
105static DEFINE_PER_CPU(struct completion, die_complete);
106
105atomic_t init_deasserted; 107atomic_t init_deasserted;
106 108
107/* 109/*
@@ -111,7 +113,6 @@ atomic_t init_deasserted;
111static void smp_callin(void) 113static void smp_callin(void)
112{ 114{
113 int cpuid, phys_id; 115 int cpuid, phys_id;
114 unsigned long timeout;
115 116
116 /* 117 /*
117 * If waken up by an INIT in an 82489DX configuration 118 * If waken up by an INIT in an 82489DX configuration
@@ -130,37 +131,6 @@ static void smp_callin(void)
130 * (This works even if the APIC is not enabled.) 131 * (This works even if the APIC is not enabled.)
131 */ 132 */
132 phys_id = read_apic_id(); 133 phys_id = read_apic_id();
133 if (cpumask_test_cpu(cpuid, cpu_callin_mask)) {
134 panic("%s: phys CPU#%d, CPU#%d already present??\n", __func__,
135 phys_id, cpuid);
136 }
137 pr_debug("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id);
138
139 /*
140 * STARTUP IPIs are fragile beasts as they might sometimes
141 * trigger some glue motherboard logic. Complete APIC bus
142 * silence for 1 second, this overestimates the time the
143 * boot CPU is spending to send the up to 2 STARTUP IPIs
144 * by a factor of two. This should be enough.
145 */
146
147 /*
148 * Waiting 2s total for startup (udelay is not yet working)
149 */
150 timeout = jiffies + 2*HZ;
151 while (time_before(jiffies, timeout)) {
152 /*
153 * Has the boot CPU finished it's STARTUP sequence?
154 */
155 if (cpumask_test_cpu(cpuid, cpu_callout_mask))
156 break;
157 cpu_relax();
158 }
159
160 if (!time_before(jiffies, timeout)) {
161 panic("%s: CPU%d started up but did not get a callout!\n",
162 __func__, cpuid);
163 }
164 134
165 /* 135 /*
166 * the boot CPU has finished the init stage and is spinning 136 * the boot CPU has finished the init stage and is spinning
@@ -296,11 +266,19 @@ void smp_store_cpu_info(int id)
296} 266}
297 267
298static bool 268static bool
269topology_same_node(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
270{
271 int cpu1 = c->cpu_index, cpu2 = o->cpu_index;
272
273 return (cpu_to_node(cpu1) == cpu_to_node(cpu2));
274}
275
276static bool
299topology_sane(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o, const char *name) 277topology_sane(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o, const char *name)
300{ 278{
301 int cpu1 = c->cpu_index, cpu2 = o->cpu_index; 279 int cpu1 = c->cpu_index, cpu2 = o->cpu_index;
302 280
303 return !WARN_ONCE(cpu_to_node(cpu1) != cpu_to_node(cpu2), 281 return !WARN_ONCE(!topology_same_node(c, o),
304 "sched: CPU #%d's %s-sibling CPU #%d is not on the same node! " 282 "sched: CPU #%d's %s-sibling CPU #%d is not on the same node! "
305 "[node: %d != %d]. Ignoring dependency.\n", 283 "[node: %d != %d]. Ignoring dependency.\n",
306 cpu1, name, cpu2, cpu_to_node(cpu1), cpu_to_node(cpu2)); 284 cpu1, name, cpu2, cpu_to_node(cpu1), cpu_to_node(cpu2));
@@ -341,17 +319,44 @@ static bool match_llc(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
341 return false; 319 return false;
342} 320}
343 321
344static bool match_mc(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o) 322/*
323 * Unlike the other levels, we do not enforce keeping a
324 * multicore group inside a NUMA node. If this happens, we will
325 * discard the MC level of the topology later.
326 */
327static bool match_die(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
345{ 328{
346 if (c->phys_proc_id == o->phys_proc_id) { 329 if (c->phys_proc_id == o->phys_proc_id)
347 if (cpu_has(c, X86_FEATURE_AMD_DCM)) 330 return true;
348 return true;
349
350 return topology_sane(c, o, "mc");
351 }
352 return false; 331 return false;
353} 332}
354 333
334static struct sched_domain_topology_level numa_inside_package_topology[] = {
335#ifdef CONFIG_SCHED_SMT
336 { cpu_smt_mask, cpu_smt_flags, SD_INIT_NAME(SMT) },
337#endif
338#ifdef CONFIG_SCHED_MC
339 { cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) },
340#endif
341 { NULL, },
342};
343/*
344 * set_sched_topology() sets the topology internal to a CPU. The
345 * NUMA topologies are layered on top of it to build the full
346 * system topology.
347 *
348 * If NUMA nodes are observed to occur within a CPU package, this
349 * function should be called. It forces the sched domain code to
350 * only use the SMT level for the CPU portion of the topology.
351 * This essentially falls back to relying on NUMA information
352 * from the SRAT table to describe the entire system topology
353 * (except for hyperthreads).
354 */
355static void primarily_use_numa_for_topology(void)
356{
357 set_sched_topology(numa_inside_package_topology);
358}
359
355void set_cpu_sibling_map(int cpu) 360void set_cpu_sibling_map(int cpu)
356{ 361{
357 bool has_smt = smp_num_siblings > 1; 362 bool has_smt = smp_num_siblings > 1;
@@ -388,7 +393,7 @@ void set_cpu_sibling_map(int cpu)
388 for_each_cpu(i, cpu_sibling_setup_mask) { 393 for_each_cpu(i, cpu_sibling_setup_mask) {
389 o = &cpu_data(i); 394 o = &cpu_data(i);
390 395
391 if ((i == cpu) || (has_mp && match_mc(c, o))) { 396 if ((i == cpu) || (has_mp && match_die(c, o))) {
392 link_mask(core, cpu, i); 397 link_mask(core, cpu, i);
393 398
394 /* 399 /*
@@ -410,6 +415,8 @@ void set_cpu_sibling_map(int cpu)
410 } else if (i != cpu && !c->booted_cores) 415 } else if (i != cpu && !c->booted_cores)
411 c->booted_cores = cpu_data(i).booted_cores; 416 c->booted_cores = cpu_data(i).booted_cores;
412 } 417 }
418 if (match_die(c, o) && !topology_same_node(c, o))
419 primarily_use_numa_for_topology();
413 } 420 }
414} 421}
415 422
@@ -753,8 +760,8 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
753 unsigned long start_ip = real_mode_header->trampoline_start; 760 unsigned long start_ip = real_mode_header->trampoline_start;
754 761
755 unsigned long boot_error = 0; 762 unsigned long boot_error = 0;
756 int timeout;
757 int cpu0_nmi_registered = 0; 763 int cpu0_nmi_registered = 0;
764 unsigned long timeout;
758 765
759 /* Just in case we booted with a single CPU. */ 766 /* Just in case we booted with a single CPU. */
760 alternatives_enable_smp(); 767 alternatives_enable_smp();
@@ -802,6 +809,15 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
802 } 809 }
803 810
804 /* 811 /*
812 * AP might wait on cpu_callout_mask in cpu_init() with
813 * cpu_initialized_mask set if previous attempt to online
814 * it timed-out. Clear cpu_initialized_mask so that after
815 * INIT/SIPI it could start with a clean state.
816 */
817 cpumask_clear_cpu(cpu, cpu_initialized_mask);
818 smp_mb();
819
820 /*
805 * Wake up a CPU in difference cases: 821 * Wake up a CPU in difference cases:
806 * - Use the method in the APIC driver if it's defined 822 * - Use the method in the APIC driver if it's defined
807 * Otherwise, 823 * Otherwise,
@@ -815,53 +831,38 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
815 831
816 if (!boot_error) { 832 if (!boot_error) {
817 /* 833 /*
818 * allow APs to start initializing. 834 * Wait 10s total for a response from AP
819 */ 835 */
820 pr_debug("Before Callout %d\n", cpu); 836 boot_error = -1;
821 cpumask_set_cpu(cpu, cpu_callout_mask); 837 timeout = jiffies + 10*HZ;
822 pr_debug("After Callout %d\n", cpu); 838 while (time_before(jiffies, timeout)) {
839 if (cpumask_test_cpu(cpu, cpu_initialized_mask)) {
840 /*
841 * Tell AP to proceed with initialization
842 */
843 cpumask_set_cpu(cpu, cpu_callout_mask);
844 boot_error = 0;
845 break;
846 }
847 udelay(100);
848 schedule();
849 }
850 }
823 851
852 if (!boot_error) {
824 /* 853 /*
825 * Wait 5s total for a response 854 * Wait till AP completes initial initialization
826 */ 855 */
827 for (timeout = 0; timeout < 50000; timeout++) { 856 while (!cpumask_test_cpu(cpu, cpu_callin_mask)) {
828 if (cpumask_test_cpu(cpu, cpu_callin_mask))
829 break; /* It has booted */
830 udelay(100);
831 /* 857 /*
832 * Allow other tasks to run while we wait for the 858 * Allow other tasks to run while we wait for the
833 * AP to come online. This also gives a chance 859 * AP to come online. This also gives a chance
834 * for the MTRR work(triggered by the AP coming online) 860 * for the MTRR work(triggered by the AP coming online)
835 * to be completed in the stop machine context. 861 * to be completed in the stop machine context.
836 */ 862 */
863 udelay(100);
837 schedule(); 864 schedule();
838 } 865 }
839
840 if (cpumask_test_cpu(cpu, cpu_callin_mask)) {
841 print_cpu_msr(&cpu_data(cpu));
842 pr_debug("CPU%d: has booted.\n", cpu);
843 } else {
844 boot_error = 1;
845 if (*trampoline_status == 0xA5A5A5A5)
846 /* trampoline started but...? */
847 pr_err("CPU%d: Stuck ??\n", cpu);
848 else
849 /* trampoline code not run */
850 pr_err("CPU%d: Not responding\n", cpu);
851 if (apic->inquire_remote_apic)
852 apic->inquire_remote_apic(apicid);
853 }
854 }
855
856 if (boot_error) {
857 /* Try to put things back the way they were before ... */
858 numa_remove_cpu(cpu); /* was set by numa_add_cpu */
859
860 /* was set by do_boot_cpu() */
861 cpumask_clear_cpu(cpu, cpu_callout_mask);
862
863 /* was set by cpu_init() */
864 cpumask_clear_cpu(cpu, cpu_initialized_mask);
865 } 866 }
866 867
867 /* mark "stuck" area as not stuck */ 868 /* mark "stuck" area as not stuck */
@@ -1326,26 +1327,24 @@ int native_cpu_disable(void)
1326 return ret; 1327 return ret;
1327 1328
1328 clear_local_APIC(); 1329 clear_local_APIC();
1329 1330 init_completion(&per_cpu(die_complete, smp_processor_id()));
1330 cpu_disable_common(); 1331 cpu_disable_common();
1332
1331 return 0; 1333 return 0;
1332} 1334}
1333 1335
1334void native_cpu_die(unsigned int cpu) 1336void native_cpu_die(unsigned int cpu)
1335{ 1337{
1336 /* We don't do anything here: idle task is faking death itself. */ 1338 /* We don't do anything here: idle task is faking death itself. */
1337 unsigned int i; 1339 wait_for_completion_timeout(&per_cpu(die_complete, cpu), HZ);
1338 1340
1339 for (i = 0; i < 10; i++) { 1341 /* They ack this in play_dead() by setting CPU_DEAD */
1340 /* They ack this in play_dead by setting CPU_DEAD */ 1342 if (per_cpu(cpu_state, cpu) == CPU_DEAD) {
1341 if (per_cpu(cpu_state, cpu) == CPU_DEAD) { 1343 if (system_state == SYSTEM_RUNNING)
1342 if (system_state == SYSTEM_RUNNING) 1344 pr_info("CPU %u is now offline\n", cpu);
1343 pr_info("CPU %u is now offline\n", cpu); 1345 } else {
1344 return; 1346 pr_err("CPU %u didn't die...\n", cpu);
1345 }
1346 msleep(100);
1347 } 1347 }
1348 pr_err("CPU %u didn't die...\n", cpu);
1349} 1348}
1350 1349
1351void play_dead_common(void) 1350void play_dead_common(void)
@@ -1357,6 +1356,7 @@ void play_dead_common(void)
1357 mb(); 1356 mb();
1358 /* Ack it */ 1357 /* Ack it */
1359 __this_cpu_write(cpu_state, CPU_DEAD); 1358 __this_cpu_write(cpu_state, CPU_DEAD);
1359 complete(&per_cpu(die_complete, smp_processor_id()));
1360 1360
1361 /* 1361 /*
1362 * With physical CPU hotplug, we should halt the cpu 1362 * With physical CPU hotplug, we should halt the cpu