diff options
-rw-r--r-- | Documentation/cpu-hotplug.txt | 24 | ||||
-rw-r--r-- | Documentation/kernel-parameters.txt | 14 | ||||
-rw-r--r-- | arch/x86/Kconfig | 44 | ||||
-rw-r--r-- | arch/x86/include/asm/cpu.h | 4 | ||||
-rw-r--r-- | arch/x86/include/asm/smp.h | 1 | ||||
-rw-r--r-- | arch/x86/kernel/apic/io_apic.c | 4 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/common.c | 5 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mtrr/main.c | 9 | ||||
-rw-r--r-- | arch/x86/kernel/head_32.S | 13 | ||||
-rw-r--r-- | arch/x86/kernel/head_64.S | 16 | ||||
-rw-r--r-- | arch/x86/kernel/i387.c | 6 | ||||
-rw-r--r-- | arch/x86/kernel/smpboot.c | 149 | ||||
-rw-r--r-- | arch/x86/kernel/topology.c | 101 | ||||
-rw-r--r-- | arch/x86/power/cpu.c | 82 | ||||
-rw-r--r-- | kernel/cpu.c | 5 |
15 files changed, 436 insertions, 41 deletions
diff --git a/Documentation/cpu-hotplug.txt b/Documentation/cpu-hotplug.txt index 66ef8f35613d..9f401350f502 100644 --- a/Documentation/cpu-hotplug.txt +++ b/Documentation/cpu-hotplug.txt | |||
@@ -207,6 +207,30 @@ by making it not-removable. | |||
207 | 207 | ||
208 | In such cases you will also notice that the online file is missing under cpu0. | 208 | In such cases you will also notice that the online file is missing under cpu0. |
209 | 209 | ||
210 | Q: Is CPU0 removable on X86? | ||
211 | A: Yes. If kernel is compiled with CONFIG_BOOTPARAM_HOTPLUG_CPU0=y, CPU0 is | ||
212 | removable by default. Otherwise, CPU0 is also removable by kernel option | ||
213 | cpu0_hotplug. | ||
214 | |||
215 | But some features depend on CPU0. Two known dependencies are: | ||
216 | |||
217 | 1. Resume from hibernate/suspend depends on CPU0. Hibernate/suspend will fail if | ||
218 | CPU0 is offline and you need to online CPU0 before hibernate/suspend can | ||
219 | continue. | ||
220 | 2. PIC interrupts also depend on CPU0. CPU0 can't be removed if a PIC interrupt | ||
221 | is detected. | ||
222 | |||
223 | It's said poweroff/reboot may depend on CPU0 on some machines although I haven't | ||
224 | seen any poweroff/reboot failure so far after CPU0 is offline on a few tested | ||
225 | machines. | ||
226 | |||
227 | Please let me know if you know or see any other dependencies of CPU0. | ||
228 | |||
229 | If the dependencies are under your control, you can turn on CPU0 hotplug feature | ||
230 | either by CONFIG_BOOTPARAM_HOTPLUG_CPU0 or by kernel parameter cpu0_hotplug. | ||
231 | |||
232 | --Fenghua Yu <fenghua.yu@intel.com> | ||
233 | |||
210 | Q: How do i find out if a particular CPU is not removable? | 234 | Q: How do i find out if a particular CPU is not removable? |
211 | A: Depending on the implementation, some architectures may show this by the | 235 | A: Depending on the implementation, some architectures may show this by the |
212 | absence of the "online" file. This is done if it can be determined ahead of | 236 | absence of the "online" file. This is done if it can be determined ahead of |
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 03d1251a915e..5190f1706414 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt | |||
@@ -1984,6 +1984,20 @@ bytes respectively. Such letter suffixes can also be entirely omitted. | |||
1984 | 1984 | ||
1985 | nox2apic [X86-64,APIC] Do not enable x2APIC mode. | 1985 | nox2apic [X86-64,APIC] Do not enable x2APIC mode. |
1986 | 1986 | ||
1987 | cpu0_hotplug [X86] Turn on CPU0 hotplug feature when | ||
1988 | CONFIG_BOOTPARAM_HOTPLUG_CPU0 is off. | ||
1989 | Some features depend on CPU0. Known dependencies are: | ||
1990 | 1. Resume from suspend/hibernate depends on CPU0. | ||
1991 | Suspend/hibernate will fail if CPU0 is offline and you | ||
1992 | need to online CPU0 before suspend/hibernate. | ||
1993 | 2. PIC interrupts also depend on CPU0. CPU0 can't be | ||
1994 | removed if a PIC interrupt is detected. | ||
1995 | It's said poweroff/reboot may depend on CPU0 on some | ||
1996 | machines although I haven't seen such issues so far | ||
1997 | after CPU0 is offline on a few tested machines. | ||
1998 | If the dependencies are under your control, you can | ||
1999 | turn on cpu0_hotplug. | ||
2000 | |||
1987 | nptcg= [IA-64] Override max number of concurrent global TLB | 2001 | nptcg= [IA-64] Override max number of concurrent global TLB |
1988 | purges which is reported from either PAL_VM_SUMMARY or | 2002 | purges which is reported from either PAL_VM_SUMMARY or |
1989 | SAL PALO. | 2003 | SAL PALO. |
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 6c304438b503..2d643255c40d 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -1698,6 +1698,50 @@ config HOTPLUG_CPU | |||
1698 | automatically on SMP systems. ) | 1698 | automatically on SMP systems. ) |
1699 | Say N if you want to disable CPU hotplug. | 1699 | Say N if you want to disable CPU hotplug. |
1700 | 1700 | ||
1701 | config BOOTPARAM_HOTPLUG_CPU0 | ||
1702 | bool "Set default setting of cpu0_hotpluggable" | ||
1703 | default n | ||
1704 | depends on HOTPLUG_CPU && EXPERIMENTAL | ||
1705 | ---help--- | ||
1706 | Set whether default state of cpu0_hotpluggable is on or off. | ||
1707 | |||
1708 | Say Y here to enable CPU0 hotplug by default. If this switch | ||
1709 | is turned on, there is no need to give cpu0_hotplug kernel | ||
1710 | parameter and the CPU0 hotplug feature is enabled by default. | ||
1711 | |||
1712 | Please note: there are two known CPU0 dependencies if you want | ||
1713 | to enable the CPU0 hotplug feature either by this switch or by | ||
1714 | cpu0_hotplug kernel parameter. | ||
1715 | |||
1716 | First, resume from hibernate or suspend always starts from CPU0. | ||
1717 | So hibernate and suspend are prevented if CPU0 is offline. | ||
1718 | |||
1719 | Second dependency is PIC interrupts always go to CPU0. CPU0 can not | ||
1720 | offline if any interrupt can not migrate out of CPU0. There may | ||
1721 | be other CPU0 dependencies. | ||
1722 | |||
1723 | Please make sure the dependencies are under your control before | ||
1724 | you enable this feature. | ||
1725 | |||
1726 | Say N if you don't want to enable CPU0 hotplug feature by default. | ||
1727 | You still can enable the CPU0 hotplug feature at boot by kernel | ||
1728 | parameter cpu0_hotplug. | ||
1729 | |||
1730 | config DEBUG_HOTPLUG_CPU0 | ||
1731 | def_bool n | ||
1732 | prompt "Debug CPU0 hotplug" | ||
1733 | depends on HOTPLUG_CPU && EXPERIMENTAL | ||
1734 | ---help--- | ||
1735 | Enabling this option offlines CPU0 (if CPU0 can be offlined) as | ||
1736 | soon as possible and boots up userspace with CPU0 offlined. User | ||
1737 | can online CPU0 back after boot time. | ||
1738 | |||
1739 | To debug CPU0 hotplug, you need to enable CPU0 offline/online | ||
1740 | feature by either turning on CONFIG_BOOTPARAM_HOTPLUG_CPU0 during | ||
1741 | compilation or giving cpu0_hotplug kernel parameter at boot. | ||
1742 | |||
1743 | If unsure, say N. | ||
1744 | |||
1701 | config COMPAT_VDSO | 1745 | config COMPAT_VDSO |
1702 | def_bool y | 1746 | def_bool y |
1703 | prompt "Compat VDSO support" | 1747 | prompt "Compat VDSO support" |
diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h index 4564c8e28a33..5f9a1243190e 100644 --- a/arch/x86/include/asm/cpu.h +++ b/arch/x86/include/asm/cpu.h | |||
@@ -28,6 +28,10 @@ struct x86_cpu { | |||
28 | #ifdef CONFIG_HOTPLUG_CPU | 28 | #ifdef CONFIG_HOTPLUG_CPU |
29 | extern int arch_register_cpu(int num); | 29 | extern int arch_register_cpu(int num); |
30 | extern void arch_unregister_cpu(int); | 30 | extern void arch_unregister_cpu(int); |
31 | extern void __cpuinit start_cpu0(void); | ||
32 | #ifdef CONFIG_DEBUG_HOTPLUG_CPU0 | ||
33 | extern int _debug_hotplug_cpu(int cpu, int action); | ||
34 | #endif | ||
31 | #endif | 35 | #endif |
32 | 36 | ||
33 | DECLARE_PER_CPU(int, cpu_state); | 37 | DECLARE_PER_CPU(int, cpu_state); |
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 4f19a1526037..b073aaea747c 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h | |||
@@ -166,6 +166,7 @@ void native_send_call_func_ipi(const struct cpumask *mask); | |||
166 | void native_send_call_func_single_ipi(int cpu); | 166 | void native_send_call_func_single_ipi(int cpu); |
167 | void x86_idle_thread_init(unsigned int cpu, struct task_struct *idle); | 167 | void x86_idle_thread_init(unsigned int cpu, struct task_struct *idle); |
168 | 168 | ||
169 | void smp_store_boot_cpu_info(void); | ||
169 | void smp_store_cpu_info(int id); | 170 | void smp_store_cpu_info(int id); |
170 | #define cpu_physical_id(cpu) per_cpu(x86_cpu_to_apicid, cpu) | 171 | #define cpu_physical_id(cpu) per_cpu(x86_cpu_to_apicid, cpu) |
171 | 172 | ||
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 1817fa911024..f78fc2b4deb0 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c | |||
@@ -2199,9 +2199,11 @@ static int ioapic_retrigger_irq(struct irq_data *data) | |||
2199 | { | 2199 | { |
2200 | struct irq_cfg *cfg = data->chip_data; | 2200 | struct irq_cfg *cfg = data->chip_data; |
2201 | unsigned long flags; | 2201 | unsigned long flags; |
2202 | int cpu; | ||
2202 | 2203 | ||
2203 | raw_spin_lock_irqsave(&vector_lock, flags); | 2204 | raw_spin_lock_irqsave(&vector_lock, flags); |
2204 | apic->send_IPI_mask(cpumask_of(cpumask_first(cfg->domain)), cfg->vector); | 2205 | cpu = cpumask_first_and(cfg->domain, cpu_online_mask); |
2206 | apic->send_IPI_mask(cpumask_of(cpu), cfg->vector); | ||
2205 | raw_spin_unlock_irqrestore(&vector_lock, flags); | 2207 | raw_spin_unlock_irqrestore(&vector_lock, flags); |
2206 | 2208 | ||
2207 | return 1; | 2209 | return 1; |
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 7505f7b13e71..ca165ac6793b 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
@@ -1237,7 +1237,7 @@ void __cpuinit cpu_init(void) | |||
1237 | oist = &per_cpu(orig_ist, cpu); | 1237 | oist = &per_cpu(orig_ist, cpu); |
1238 | 1238 | ||
1239 | #ifdef CONFIG_NUMA | 1239 | #ifdef CONFIG_NUMA |
1240 | if (cpu != 0 && this_cpu_read(numa_node) == 0 && | 1240 | if (this_cpu_read(numa_node) == 0 && |
1241 | early_cpu_to_node(cpu) != NUMA_NO_NODE) | 1241 | early_cpu_to_node(cpu) != NUMA_NO_NODE) |
1242 | set_numa_node(early_cpu_to_node(cpu)); | 1242 | set_numa_node(early_cpu_to_node(cpu)); |
1243 | #endif | 1243 | #endif |
@@ -1269,8 +1269,7 @@ void __cpuinit cpu_init(void) | |||
1269 | barrier(); | 1269 | barrier(); |
1270 | 1270 | ||
1271 | x86_configure_nx(); | 1271 | x86_configure_nx(); |
1272 | if (cpu != 0) | 1272 | enable_x2apic(); |
1273 | enable_x2apic(); | ||
1274 | 1273 | ||
1275 | /* | 1274 | /* |
1276 | * set up and load the per-CPU TSS | 1275 | * set up and load the per-CPU TSS |
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 6b96110bb0c3..e4c1a4184531 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c | |||
@@ -695,11 +695,16 @@ void mtrr_ap_init(void) | |||
695 | } | 695 | } |
696 | 696 | ||
697 | /** | 697 | /** |
698 | * Save current fixed-range MTRR state of the BSP | 698 | * Save current fixed-range MTRR state of the first cpu in cpu_online_mask. |
699 | */ | 699 | */ |
700 | void mtrr_save_state(void) | 700 | void mtrr_save_state(void) |
701 | { | 701 | { |
702 | smp_call_function_single(0, mtrr_save_fixed_ranges, NULL, 1); | 702 | int first_cpu; |
703 | |||
704 | get_online_cpus(); | ||
705 | first_cpu = cpumask_first(cpu_online_mask); | ||
706 | smp_call_function_single(first_cpu, mtrr_save_fixed_ranges, NULL, 1); | ||
707 | put_online_cpus(); | ||
703 | } | 708 | } |
704 | 709 | ||
705 | void set_mtrr_aps_delayed_init(void) | 710 | void set_mtrr_aps_delayed_init(void) |
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 4dac2f68ed4a..8e7f6556028f 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S | |||
@@ -266,6 +266,19 @@ num_subarch_entries = (. - subarch_entries) / 4 | |||
266 | jmp default_entry | 266 | jmp default_entry |
267 | #endif /* CONFIG_PARAVIRT */ | 267 | #endif /* CONFIG_PARAVIRT */ |
268 | 268 | ||
269 | #ifdef CONFIG_HOTPLUG_CPU | ||
270 | /* | ||
271 | * Boot CPU0 entry point. It's called from play_dead(). Everything has been set | ||
272 | * up already except stack. We just set up stack here. Then call | ||
273 | * start_secondary(). | ||
274 | */ | ||
275 | ENTRY(start_cpu0) | ||
276 | movl stack_start, %ecx | ||
277 | movl %ecx, %esp | ||
278 | jmp *(initial_code) | ||
279 | ENDPROC(start_cpu0) | ||
280 | #endif | ||
281 | |||
269 | /* | 282 | /* |
270 | * Non-boot CPU entry point; entered from trampoline.S | 283 | * Non-boot CPU entry point; entered from trampoline.S |
271 | * We can't lgdt here, because lgdt itself uses a data segment, but | 284 | * We can't lgdt here, because lgdt itself uses a data segment, but |
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 94bf9cc2c7ee..980053c4b9cc 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S | |||
@@ -252,6 +252,22 @@ ENTRY(secondary_startup_64) | |||
252 | pushq %rax # target address in negative space | 252 | pushq %rax # target address in negative space |
253 | lretq | 253 | lretq |
254 | 254 | ||
255 | #ifdef CONFIG_HOTPLUG_CPU | ||
256 | /* | ||
257 | * Boot CPU0 entry point. It's called from play_dead(). Everything has been set | ||
258 | * up already except stack. We just set up stack here. Then call | ||
259 | * start_secondary(). | ||
260 | */ | ||
261 | ENTRY(start_cpu0) | ||
262 | movq stack_start(%rip),%rsp | ||
263 | movq initial_code(%rip),%rax | ||
264 | pushq $0 # fake return address to stop unwinder | ||
265 | pushq $__KERNEL_CS # set correct cs | ||
266 | pushq %rax # target address in negative space | ||
267 | lretq | ||
268 | ENDPROC(start_cpu0) | ||
269 | #endif | ||
270 | |||
255 | /* SMP bootup changes these two */ | 271 | /* SMP bootup changes these two */ |
256 | __REFDATA | 272 | __REFDATA |
257 | .align 8 | 273 | .align 8 |
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index 675a05012449..245a71db401a 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c | |||
@@ -175,7 +175,11 @@ void __cpuinit fpu_init(void) | |||
175 | cr0 |= X86_CR0_EM; | 175 | cr0 |= X86_CR0_EM; |
176 | write_cr0(cr0); | 176 | write_cr0(cr0); |
177 | 177 | ||
178 | if (!smp_processor_id()) | 178 | /* |
179 | * init_thread_xstate is only called once to avoid overriding | ||
180 | * xstate_size during boot time or during CPU hotplug. | ||
181 | */ | ||
182 | if (xstate_size == 0) | ||
179 | init_thread_xstate(); | 183 | init_thread_xstate(); |
180 | 184 | ||
181 | mxcsr_feature_mask_init(); | 185 | mxcsr_feature_mask_init(); |
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index f3e2ec878b8c..c635663b20d9 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
@@ -127,8 +127,8 @@ EXPORT_PER_CPU_SYMBOL(cpu_info); | |||
127 | atomic_t init_deasserted; | 127 | atomic_t init_deasserted; |
128 | 128 | ||
129 | /* | 129 | /* |
130 | * Report back to the Boot Processor. | 130 | * Report back to the Boot Processor during boot time or to the caller processor |
131 | * Running on AP. | 131 | * during CPU online. |
132 | */ | 132 | */ |
133 | static void __cpuinit smp_callin(void) | 133 | static void __cpuinit smp_callin(void) |
134 | { | 134 | { |
@@ -140,15 +140,17 @@ static void __cpuinit smp_callin(void) | |||
140 | * we may get here before an INIT-deassert IPI reaches | 140 | * we may get here before an INIT-deassert IPI reaches |
141 | * our local APIC. We have to wait for the IPI or we'll | 141 | * our local APIC. We have to wait for the IPI or we'll |
142 | * lock up on an APIC access. | 142 | * lock up on an APIC access. |
143 | * | ||
144 | * Since CPU0 is not wakened up by INIT, it doesn't wait for the IPI. | ||
143 | */ | 145 | */ |
144 | if (apic->wait_for_init_deassert) | 146 | cpuid = smp_processor_id(); |
147 | if (apic->wait_for_init_deassert && cpuid != 0) | ||
145 | apic->wait_for_init_deassert(&init_deasserted); | 148 | apic->wait_for_init_deassert(&init_deasserted); |
146 | 149 | ||
147 | /* | 150 | /* |
148 | * (This works even if the APIC is not enabled.) | 151 | * (This works even if the APIC is not enabled.) |
149 | */ | 152 | */ |
150 | phys_id = read_apic_id(); | 153 | phys_id = read_apic_id(); |
151 | cpuid = smp_processor_id(); | ||
152 | if (cpumask_test_cpu(cpuid, cpu_callin_mask)) { | 154 | if (cpumask_test_cpu(cpuid, cpu_callin_mask)) { |
153 | panic("%s: phys CPU#%d, CPU#%d already present??\n", __func__, | 155 | panic("%s: phys CPU#%d, CPU#%d already present??\n", __func__, |
154 | phys_id, cpuid); | 156 | phys_id, cpuid); |
@@ -230,6 +232,8 @@ static void __cpuinit smp_callin(void) | |||
230 | cpumask_set_cpu(cpuid, cpu_callin_mask); | 232 | cpumask_set_cpu(cpuid, cpu_callin_mask); |
231 | } | 233 | } |
232 | 234 | ||
235 | static int cpu0_logical_apicid; | ||
236 | static int enable_start_cpu0; | ||
233 | /* | 237 | /* |
234 | * Activate a secondary processor. | 238 | * Activate a secondary processor. |
235 | */ | 239 | */ |
@@ -245,6 +249,8 @@ notrace static void __cpuinit start_secondary(void *unused) | |||
245 | preempt_disable(); | 249 | preempt_disable(); |
246 | smp_callin(); | 250 | smp_callin(); |
247 | 251 | ||
252 | enable_start_cpu0 = 0; | ||
253 | |||
248 | #ifdef CONFIG_X86_32 | 254 | #ifdef CONFIG_X86_32 |
249 | /* switch away from the initial page table */ | 255 | /* switch away from the initial page table */ |
250 | load_cr3(swapper_pg_dir); | 256 | load_cr3(swapper_pg_dir); |
@@ -281,19 +287,30 @@ notrace static void __cpuinit start_secondary(void *unused) | |||
281 | cpu_idle(); | 287 | cpu_idle(); |
282 | } | 288 | } |
283 | 289 | ||
290 | void __init smp_store_boot_cpu_info(void) | ||
291 | { | ||
292 | int id = 0; /* CPU 0 */ | ||
293 | struct cpuinfo_x86 *c = &cpu_data(id); | ||
294 | |||
295 | *c = boot_cpu_data; | ||
296 | c->cpu_index = id; | ||
297 | } | ||
298 | |||
284 | /* | 299 | /* |
285 | * The bootstrap kernel entry code has set these up. Save them for | 300 | * The bootstrap kernel entry code has set these up. Save them for |
286 | * a given CPU | 301 | * a given CPU |
287 | */ | 302 | */ |
288 | |||
289 | void __cpuinit smp_store_cpu_info(int id) | 303 | void __cpuinit smp_store_cpu_info(int id) |
290 | { | 304 | { |
291 | struct cpuinfo_x86 *c = &cpu_data(id); | 305 | struct cpuinfo_x86 *c = &cpu_data(id); |
292 | 306 | ||
293 | *c = boot_cpu_data; | 307 | *c = boot_cpu_data; |
294 | c->cpu_index = id; | 308 | c->cpu_index = id; |
295 | if (id != 0) | 309 | /* |
296 | identify_secondary_cpu(c); | 310 | * During boot time, CPU0 has this setup already. Save the info when |
311 | * bringing up AP or offlined CPU0. | ||
312 | */ | ||
313 | identify_secondary_cpu(c); | ||
297 | } | 314 | } |
298 | 315 | ||
299 | static bool __cpuinit | 316 | static bool __cpuinit |
@@ -483,7 +500,7 @@ void __inquire_remote_apic(int apicid) | |||
483 | * won't ... remember to clear down the APIC, etc later. | 500 | * won't ... remember to clear down the APIC, etc later. |
484 | */ | 501 | */ |
485 | int __cpuinit | 502 | int __cpuinit |
486 | wakeup_secondary_cpu_via_nmi(int logical_apicid, unsigned long start_eip) | 503 | wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip) |
487 | { | 504 | { |
488 | unsigned long send_status, accept_status = 0; | 505 | unsigned long send_status, accept_status = 0; |
489 | int maxlvt; | 506 | int maxlvt; |
@@ -491,7 +508,7 @@ wakeup_secondary_cpu_via_nmi(int logical_apicid, unsigned long start_eip) | |||
491 | /* Target chip */ | 508 | /* Target chip */ |
492 | /* Boot on the stack */ | 509 | /* Boot on the stack */ |
493 | /* Kick the second */ | 510 | /* Kick the second */ |
494 | apic_icr_write(APIC_DM_NMI | apic->dest_logical, logical_apicid); | 511 | apic_icr_write(APIC_DM_NMI | apic->dest_logical, apicid); |
495 | 512 | ||
496 | pr_debug("Waiting for send to finish...\n"); | 513 | pr_debug("Waiting for send to finish...\n"); |
497 | send_status = safe_apic_wait_icr_idle(); | 514 | send_status = safe_apic_wait_icr_idle(); |
@@ -651,6 +668,63 @@ static void __cpuinit announce_cpu(int cpu, int apicid) | |||
651 | node, cpu, apicid); | 668 | node, cpu, apicid); |
652 | } | 669 | } |
653 | 670 | ||
671 | static int wakeup_cpu0_nmi(unsigned int cmd, struct pt_regs *regs) | ||
672 | { | ||
673 | int cpu; | ||
674 | |||
675 | cpu = smp_processor_id(); | ||
676 | if (cpu == 0 && !cpu_online(cpu) && enable_start_cpu0) | ||
677 | return NMI_HANDLED; | ||
678 | |||
679 | return NMI_DONE; | ||
680 | } | ||
681 | |||
682 | /* | ||
683 | * Wake up AP by INIT, INIT, STARTUP sequence. | ||
684 | * | ||
685 | * Instead of waiting for STARTUP after INITs, BSP will execute the BIOS | ||
686 | * boot-strap code which is not a desired behavior for waking up BSP. To | ||
687 | * void the boot-strap code, wake up CPU0 by NMI instead. | ||
688 | * | ||
689 | * This works to wake up soft offlined CPU0 only. If CPU0 is hard offlined | ||
690 | * (i.e. physically hot removed and then hot added), NMI won't wake it up. | ||
691 | * We'll change this code in the future to wake up hard offlined CPU0 if | ||
692 | * real platform and request are available. | ||
693 | */ | ||
694 | static int __cpuinit | ||
695 | wakeup_cpu_via_init_nmi(int cpu, unsigned long start_ip, int apicid, | ||
696 | int *cpu0_nmi_registered) | ||
697 | { | ||
698 | int id; | ||
699 | int boot_error; | ||
700 | |||
701 | /* | ||
702 | * Wake up AP by INIT, INIT, STARTUP sequence. | ||
703 | */ | ||
704 | if (cpu) | ||
705 | return wakeup_secondary_cpu_via_init(apicid, start_ip); | ||
706 | |||
707 | /* | ||
708 | * Wake up BSP by nmi. | ||
709 | * | ||
710 | * Register a NMI handler to help wake up CPU0. | ||
711 | */ | ||
712 | boot_error = register_nmi_handler(NMI_LOCAL, | ||
713 | wakeup_cpu0_nmi, 0, "wake_cpu0"); | ||
714 | |||
715 | if (!boot_error) { | ||
716 | enable_start_cpu0 = 1; | ||
717 | *cpu0_nmi_registered = 1; | ||
718 | if (apic->dest_logical == APIC_DEST_LOGICAL) | ||
719 | id = cpu0_logical_apicid; | ||
720 | else | ||
721 | id = apicid; | ||
722 | boot_error = wakeup_secondary_cpu_via_nmi(id, start_ip); | ||
723 | } | ||
724 | |||
725 | return boot_error; | ||
726 | } | ||
727 | |||
654 | /* | 728 | /* |
655 | * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad | 729 | * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad |
656 | * (ie clustered apic addressing mode), this is a LOGICAL apic ID. | 730 | * (ie clustered apic addressing mode), this is a LOGICAL apic ID. |
@@ -666,6 +740,7 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu, struct task_struct *idle) | |||
666 | 740 | ||
667 | unsigned long boot_error = 0; | 741 | unsigned long boot_error = 0; |
668 | int timeout; | 742 | int timeout; |
743 | int cpu0_nmi_registered = 0; | ||
669 | 744 | ||
670 | /* Just in case we booted with a single CPU. */ | 745 | /* Just in case we booted with a single CPU. */ |
671 | alternatives_enable_smp(); | 746 | alternatives_enable_smp(); |
@@ -713,13 +788,16 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu, struct task_struct *idle) | |||
713 | } | 788 | } |
714 | 789 | ||
715 | /* | 790 | /* |
716 | * Kick the secondary CPU. Use the method in the APIC driver | 791 | * Wake up a CPU in difference cases: |
717 | * if it's defined - or use an INIT boot APIC message otherwise: | 792 | * - Use the method in the APIC driver if it's defined |
793 | * Otherwise, | ||
794 | * - Use an INIT boot APIC message for APs or NMI for BSP. | ||
718 | */ | 795 | */ |
719 | if (apic->wakeup_secondary_cpu) | 796 | if (apic->wakeup_secondary_cpu) |
720 | boot_error = apic->wakeup_secondary_cpu(apicid, start_ip); | 797 | boot_error = apic->wakeup_secondary_cpu(apicid, start_ip); |
721 | else | 798 | else |
722 | boot_error = wakeup_secondary_cpu_via_init(apicid, start_ip); | 799 | boot_error = wakeup_cpu_via_init_nmi(cpu, start_ip, apicid, |
800 | &cpu0_nmi_registered); | ||
723 | 801 | ||
724 | if (!boot_error) { | 802 | if (!boot_error) { |
725 | /* | 803 | /* |
@@ -784,6 +862,13 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu, struct task_struct *idle) | |||
784 | */ | 862 | */ |
785 | smpboot_restore_warm_reset_vector(); | 863 | smpboot_restore_warm_reset_vector(); |
786 | } | 864 | } |
865 | /* | ||
866 | * Clean up the nmi handler. Do this after the callin and callout sync | ||
867 | * to avoid impact of possible long unregister time. | ||
868 | */ | ||
869 | if (cpu0_nmi_registered) | ||
870 | unregister_nmi_handler(NMI_LOCAL, "wake_cpu0"); | ||
871 | |||
787 | return boot_error; | 872 | return boot_error; |
788 | } | 873 | } |
789 | 874 | ||
@@ -797,7 +882,7 @@ int __cpuinit native_cpu_up(unsigned int cpu, struct task_struct *tidle) | |||
797 | 882 | ||
798 | pr_debug("++++++++++++++++++++=_---CPU UP %u\n", cpu); | 883 | pr_debug("++++++++++++++++++++=_---CPU UP %u\n", cpu); |
799 | 884 | ||
800 | if (apicid == BAD_APICID || apicid == boot_cpu_physical_apicid || | 885 | if (apicid == BAD_APICID || |
801 | !physid_isset(apicid, phys_cpu_present_map) || | 886 | !physid_isset(apicid, phys_cpu_present_map) || |
802 | !apic->apic_id_valid(apicid)) { | 887 | !apic->apic_id_valid(apicid)) { |
803 | pr_err("%s: bad cpu %d\n", __func__, cpu); | 888 | pr_err("%s: bad cpu %d\n", __func__, cpu); |
@@ -995,7 +1080,7 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) | |||
995 | /* | 1080 | /* |
996 | * Setup boot CPU information | 1081 | * Setup boot CPU information |
997 | */ | 1082 | */ |
998 | smp_store_cpu_info(0); /* Final full version of the data */ | 1083 | smp_store_boot_cpu_info(); /* Final full version of the data */ |
999 | cpumask_copy(cpu_callin_mask, cpumask_of(0)); | 1084 | cpumask_copy(cpu_callin_mask, cpumask_of(0)); |
1000 | mb(); | 1085 | mb(); |
1001 | 1086 | ||
@@ -1031,6 +1116,11 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) | |||
1031 | */ | 1116 | */ |
1032 | setup_local_APIC(); | 1117 | setup_local_APIC(); |
1033 | 1118 | ||
1119 | if (x2apic_mode) | ||
1120 | cpu0_logical_apicid = apic_read(APIC_LDR); | ||
1121 | else | ||
1122 | cpu0_logical_apicid = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR)); | ||
1123 | |||
1034 | /* | 1124 | /* |
1035 | * Enable IO APIC before setting up error vector | 1125 | * Enable IO APIC before setting up error vector |
1036 | */ | 1126 | */ |
@@ -1219,19 +1309,6 @@ void cpu_disable_common(void) | |||
1219 | 1309 | ||
1220 | int native_cpu_disable(void) | 1310 | int native_cpu_disable(void) |
1221 | { | 1311 | { |
1222 | int cpu = smp_processor_id(); | ||
1223 | |||
1224 | /* | ||
1225 | * Perhaps use cpufreq to drop frequency, but that could go | ||
1226 | * into generic code. | ||
1227 | * | ||
1228 | * We won't take down the boot processor on i386 due to some | ||
1229 | * interrupts only being able to be serviced by the BSP. | ||
1230 | * Especially so if we're not using an IOAPIC -zwane | ||
1231 | */ | ||
1232 | if (cpu == 0) | ||
1233 | return -EBUSY; | ||
1234 | |||
1235 | clear_local_APIC(); | 1312 | clear_local_APIC(); |
1236 | 1313 | ||
1237 | cpu_disable_common(); | 1314 | cpu_disable_common(); |
@@ -1271,6 +1348,14 @@ void play_dead_common(void) | |||
1271 | local_irq_disable(); | 1348 | local_irq_disable(); |
1272 | } | 1349 | } |
1273 | 1350 | ||
1351 | static bool wakeup_cpu0(void) | ||
1352 | { | ||
1353 | if (smp_processor_id() == 0 && enable_start_cpu0) | ||
1354 | return true; | ||
1355 | |||
1356 | return false; | ||
1357 | } | ||
1358 | |||
1274 | /* | 1359 | /* |
1275 | * We need to flush the caches before going to sleep, lest we have | 1360 | * We need to flush the caches before going to sleep, lest we have |
1276 | * dirty data in our caches when we come back up. | 1361 | * dirty data in our caches when we come back up. |
@@ -1334,6 +1419,11 @@ static inline void mwait_play_dead(void) | |||
1334 | __monitor(mwait_ptr, 0, 0); | 1419 | __monitor(mwait_ptr, 0, 0); |
1335 | mb(); | 1420 | mb(); |
1336 | __mwait(eax, 0); | 1421 | __mwait(eax, 0); |
1422 | /* | ||
1423 | * If NMI wants to wake up CPU0, start CPU0. | ||
1424 | */ | ||
1425 | if (wakeup_cpu0()) | ||
1426 | start_cpu0(); | ||
1337 | } | 1427 | } |
1338 | } | 1428 | } |
1339 | 1429 | ||
@@ -1344,6 +1434,11 @@ static inline void hlt_play_dead(void) | |||
1344 | 1434 | ||
1345 | while (1) { | 1435 | while (1) { |
1346 | native_halt(); | 1436 | native_halt(); |
1437 | /* | ||
1438 | * If NMI wants to wake up CPU0, start CPU0. | ||
1439 | */ | ||
1440 | if (wakeup_cpu0()) | ||
1441 | start_cpu0(); | ||
1347 | } | 1442 | } |
1348 | } | 1443 | } |
1349 | 1444 | ||
diff --git a/arch/x86/kernel/topology.c b/arch/x86/kernel/topology.c index 76ee97709a00..6e60b5fe2244 100644 --- a/arch/x86/kernel/topology.c +++ b/arch/x86/kernel/topology.c | |||
@@ -30,23 +30,110 @@ | |||
30 | #include <linux/mmzone.h> | 30 | #include <linux/mmzone.h> |
31 | #include <linux/init.h> | 31 | #include <linux/init.h> |
32 | #include <linux/smp.h> | 32 | #include <linux/smp.h> |
33 | #include <linux/irq.h> | ||
33 | #include <asm/cpu.h> | 34 | #include <asm/cpu.h> |
34 | 35 | ||
35 | static DEFINE_PER_CPU(struct x86_cpu, cpu_devices); | 36 | static DEFINE_PER_CPU(struct x86_cpu, cpu_devices); |
36 | 37 | ||
37 | #ifdef CONFIG_HOTPLUG_CPU | 38 | #ifdef CONFIG_HOTPLUG_CPU |
39 | |||
40 | #ifdef CONFIG_BOOTPARAM_HOTPLUG_CPU0 | ||
41 | static int cpu0_hotpluggable = 1; | ||
42 | #else | ||
43 | static int cpu0_hotpluggable; | ||
44 | static int __init enable_cpu0_hotplug(char *str) | ||
45 | { | ||
46 | cpu0_hotpluggable = 1; | ||
47 | return 1; | ||
48 | } | ||
49 | |||
50 | __setup("cpu0_hotplug", enable_cpu0_hotplug); | ||
51 | #endif | ||
52 | |||
53 | #ifdef CONFIG_DEBUG_HOTPLUG_CPU0 | ||
54 | /* | ||
55 | * This function offlines a CPU as early as possible and allows userspace to | ||
56 | * boot up without the CPU. The CPU can be onlined back by user after boot. | ||
57 | * | ||
58 | * This is only called for debugging CPU offline/online feature. | ||
59 | */ | ||
60 | int __ref _debug_hotplug_cpu(int cpu, int action) | ||
61 | { | ||
62 | struct device *dev = get_cpu_device(cpu); | ||
63 | int ret; | ||
64 | |||
65 | if (!cpu_is_hotpluggable(cpu)) | ||
66 | return -EINVAL; | ||
67 | |||
68 | cpu_hotplug_driver_lock(); | ||
69 | |||
70 | switch (action) { | ||
71 | case 0: | ||
72 | ret = cpu_down(cpu); | ||
73 | if (!ret) { | ||
74 | pr_info("CPU %u is now offline\n", cpu); | ||
75 | kobject_uevent(&dev->kobj, KOBJ_OFFLINE); | ||
76 | } else | ||
77 | pr_debug("Can't offline CPU%d.\n", cpu); | ||
78 | break; | ||
79 | case 1: | ||
80 | ret = cpu_up(cpu); | ||
81 | if (!ret) | ||
82 | kobject_uevent(&dev->kobj, KOBJ_ONLINE); | ||
83 | else | ||
84 | pr_debug("Can't online CPU%d.\n", cpu); | ||
85 | break; | ||
86 | default: | ||
87 | ret = -EINVAL; | ||
88 | } | ||
89 | |||
90 | cpu_hotplug_driver_unlock(); | ||
91 | |||
92 | return ret; | ||
93 | } | ||
94 | |||
95 | static int __init debug_hotplug_cpu(void) | ||
96 | { | ||
97 | _debug_hotplug_cpu(0, 0); | ||
98 | return 0; | ||
99 | } | ||
100 | |||
101 | late_initcall_sync(debug_hotplug_cpu); | ||
102 | #endif /* CONFIG_DEBUG_HOTPLUG_CPU0 */ | ||
103 | |||
38 | int __ref arch_register_cpu(int num) | 104 | int __ref arch_register_cpu(int num) |
39 | { | 105 | { |
106 | struct cpuinfo_x86 *c = &cpu_data(num); | ||
107 | |||
108 | /* | ||
109 | * Currently CPU0 is only hotpluggable on Intel platforms. Other | ||
110 | * vendors can add hotplug support later. | ||
111 | */ | ||
112 | if (c->x86_vendor != X86_VENDOR_INTEL) | ||
113 | cpu0_hotpluggable = 0; | ||
114 | |||
40 | /* | 115 | /* |
41 | * CPU0 cannot be offlined due to several | 116 | * Two known BSP/CPU0 dependencies: Resume from suspend/hibernate |
42 | * restrictions and assumptions in kernel. This basically | 117 | * depends on BSP. PIC interrupts depend on BSP. |
43 | * doesn't add a control file, one cannot attempt to offline | ||
44 | * BSP. | ||
45 | * | 118 | * |
46 | * Also certain PCI quirks require not to enable hotplug control | 119 | * If the BSP depencies are under control, one can tell kernel to |
47 | * for all CPU's. | 120 | * enable BSP hotplug. This basically adds a control file and |
121 | * one can attempt to offline BSP. | ||
48 | */ | 122 | */ |
49 | if (num) | 123 | if (num == 0 && cpu0_hotpluggable) { |
124 | unsigned int irq; | ||
125 | /* | ||
126 | * We won't take down the boot processor on i386 if some | ||
127 | * interrupts only are able to be serviced by the BSP in PIC. | ||
128 | */ | ||
129 | for_each_active_irq(irq) { | ||
130 | if (!IO_APIC_IRQ(irq) && irq_has_action(irq)) { | ||
131 | cpu0_hotpluggable = 0; | ||
132 | break; | ||
133 | } | ||
134 | } | ||
135 | } | ||
136 | if (num || cpu0_hotpluggable) | ||
50 | per_cpu(cpu_devices, num).cpu.hotpluggable = 1; | 137 | per_cpu(cpu_devices, num).cpu.hotpluggable = 1; |
51 | 138 | ||
52 | return register_cpu(&per_cpu(cpu_devices, num).cpu, num); | 139 | return register_cpu(&per_cpu(cpu_devices, num).cpu, num); |
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index 218cdb16163c..120cee1c3f8d 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <asm/suspend.h> | 21 | #include <asm/suspend.h> |
22 | #include <asm/debugreg.h> | 22 | #include <asm/debugreg.h> |
23 | #include <asm/fpu-internal.h> /* pcntxt_mask */ | 23 | #include <asm/fpu-internal.h> /* pcntxt_mask */ |
24 | #include <asm/cpu.h> | ||
24 | 25 | ||
25 | #ifdef CONFIG_X86_32 | 26 | #ifdef CONFIG_X86_32 |
26 | static struct saved_context saved_context; | 27 | static struct saved_context saved_context; |
@@ -237,3 +238,84 @@ void restore_processor_state(void) | |||
237 | #ifdef CONFIG_X86_32 | 238 | #ifdef CONFIG_X86_32 |
238 | EXPORT_SYMBOL(restore_processor_state); | 239 | EXPORT_SYMBOL(restore_processor_state); |
239 | #endif | 240 | #endif |
241 | |||
242 | /* | ||
243 | * When bsp_check() is called in hibernate and suspend, cpu hotplug | ||
244 | * is disabled already. So it's unnessary to handle race condition between | ||
245 | * cpumask query and cpu hotplug. | ||
246 | */ | ||
247 | static int bsp_check(void) | ||
248 | { | ||
249 | if (cpumask_first(cpu_online_mask) != 0) { | ||
250 | pr_warn("CPU0 is offline.\n"); | ||
251 | return -ENODEV; | ||
252 | } | ||
253 | |||
254 | return 0; | ||
255 | } | ||
256 | |||
257 | static int bsp_pm_callback(struct notifier_block *nb, unsigned long action, | ||
258 | void *ptr) | ||
259 | { | ||
260 | int ret = 0; | ||
261 | |||
262 | switch (action) { | ||
263 | case PM_SUSPEND_PREPARE: | ||
264 | case PM_HIBERNATION_PREPARE: | ||
265 | ret = bsp_check(); | ||
266 | break; | ||
267 | #ifdef CONFIG_DEBUG_HOTPLUG_CPU0 | ||
268 | case PM_RESTORE_PREPARE: | ||
269 | /* | ||
270 | * When system resumes from hibernation, online CPU0 because | ||
271 | * 1. it's required for resume and | ||
272 | * 2. the CPU was online before hibernation | ||
273 | */ | ||
274 | if (!cpu_online(0)) | ||
275 | _debug_hotplug_cpu(0, 1); | ||
276 | break; | ||
277 | case PM_POST_RESTORE: | ||
278 | /* | ||
279 | * When a resume really happens, this code won't be called. | ||
280 | * | ||
281 | * This code is called only when user space hibernation software | ||
282 | * prepares for snapshot device during boot time. So we just | ||
283 | * call _debug_hotplug_cpu() to restore to CPU0's state prior to | ||
284 | * preparing the snapshot device. | ||
285 | * | ||
286 | * This works for normal boot case in our CPU0 hotplug debug | ||
287 | * mode, i.e. CPU0 is offline and user mode hibernation | ||
288 | * software initializes during boot time. | ||
289 | * | ||
290 | * If CPU0 is online and user application accesses snapshot | ||
291 | * device after boot time, this will offline CPU0 and user may | ||
292 | * see different CPU0 state before and after accessing | ||
293 | * the snapshot device. But hopefully this is not a case when | ||
294 | * user debugging CPU0 hotplug. Even if users hit this case, | ||
295 | * they can easily online CPU0 back. | ||
296 | * | ||
297 | * To simplify this debug code, we only consider normal boot | ||
298 | * case. Otherwise we need to remember CPU0's state and restore | ||
299 | * to that state and resolve racy conditions etc. | ||
300 | */ | ||
301 | _debug_hotplug_cpu(0, 0); | ||
302 | break; | ||
303 | #endif | ||
304 | default: | ||
305 | break; | ||
306 | } | ||
307 | return notifier_from_errno(ret); | ||
308 | } | ||
309 | |||
310 | static int __init bsp_pm_check_init(void) | ||
311 | { | ||
312 | /* | ||
313 | * Set this bsp_pm_callback as lower priority than | ||
314 | * cpu_hotplug_pm_callback. So cpu_hotplug_pm_callback will be called | ||
315 | * earlier to disable cpu hotplug before bsp online check. | ||
316 | */ | ||
317 | pm_notifier(bsp_pm_callback, -INT_MAX); | ||
318 | return 0; | ||
319 | } | ||
320 | |||
321 | core_initcall(bsp_pm_check_init); | ||
diff --git a/kernel/cpu.c b/kernel/cpu.c index f45657f1eb8e..3046a503242c 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c | |||
@@ -603,6 +603,11 @@ cpu_hotplug_pm_callback(struct notifier_block *nb, | |||
603 | 603 | ||
604 | static int __init cpu_hotplug_pm_sync_init(void) | 604 | static int __init cpu_hotplug_pm_sync_init(void) |
605 | { | 605 | { |
606 | /* | ||
607 | * cpu_hotplug_pm_callback has higher priority than x86 | ||
608 | * bsp_pm_callback which depends on cpu_hotplug_pm_callback | ||
609 | * to disable cpu hotplug to avoid cpu hotplug race. | ||
610 | */ | ||
606 | pm_notifier(cpu_hotplug_pm_callback, 0); | 611 | pm_notifier(cpu_hotplug_pm_callback, 0); |
607 | return 0; | 612 | return 0; |
608 | } | 613 | } |