diff options
Diffstat (limited to 'arch/x86/kernel/smpboot.c')
-rw-r--r-- | arch/x86/kernel/smpboot.c | 212 |
1 files changed, 75 insertions, 137 deletions
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 3e1cecedde42..f35c2d8016ac 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
@@ -59,7 +59,6 @@ | |||
59 | #include <asm/pgtable.h> | 59 | #include <asm/pgtable.h> |
60 | #include <asm/tlbflush.h> | 60 | #include <asm/tlbflush.h> |
61 | #include <asm/mtrr.h> | 61 | #include <asm/mtrr.h> |
62 | #include <asm/nmi.h> | ||
63 | #include <asm/vmi.h> | 62 | #include <asm/vmi.h> |
64 | #include <asm/genapic.h> | 63 | #include <asm/genapic.h> |
65 | #include <linux/mc146818rtc.h> | 64 | #include <linux/mc146818rtc.h> |
@@ -68,22 +67,6 @@ | |||
68 | #include <mach_wakecpu.h> | 67 | #include <mach_wakecpu.h> |
69 | #include <smpboot_hooks.h> | 68 | #include <smpboot_hooks.h> |
70 | 69 | ||
71 | /* | ||
72 | * FIXME: For x86_64, those are defined in other files. But moving them here, | ||
73 | * would make the setup areas dependent on smp, which is a loss. When we | ||
74 | * integrate apic between arches, we can probably do a better job, but | ||
75 | * right now, they'll stay here -- glommer | ||
76 | */ | ||
77 | |||
78 | /* which logical CPU number maps to which CPU (physical APIC ID) */ | ||
79 | u16 x86_cpu_to_apicid_init[NR_CPUS] __initdata = | ||
80 | { [0 ... NR_CPUS-1] = BAD_APICID }; | ||
81 | void *x86_cpu_to_apicid_early_ptr; | ||
82 | |||
83 | u16 x86_bios_cpu_apicid_init[NR_CPUS] __initdata | ||
84 | = { [0 ... NR_CPUS-1] = BAD_APICID }; | ||
85 | void *x86_bios_cpu_apicid_early_ptr; | ||
86 | |||
87 | #ifdef CONFIG_X86_32 | 70 | #ifdef CONFIG_X86_32 |
88 | u8 apicid_2_node[MAX_APICID]; | 71 | u8 apicid_2_node[MAX_APICID]; |
89 | static int low_mappings; | 72 | static int low_mappings; |
@@ -198,13 +181,12 @@ static void map_cpu_to_logical_apicid(void) | |||
198 | map_cpu_to_node(cpu, node); | 181 | map_cpu_to_node(cpu, node); |
199 | } | 182 | } |
200 | 183 | ||
201 | static void unmap_cpu_to_logical_apicid(int cpu) | 184 | void numa_remove_cpu(int cpu) |
202 | { | 185 | { |
203 | cpu_2_logical_apicid[cpu] = BAD_APICID; | 186 | cpu_2_logical_apicid[cpu] = BAD_APICID; |
204 | unmap_cpu_to_node(cpu); | 187 | unmap_cpu_to_node(cpu); |
205 | } | 188 | } |
206 | #else | 189 | #else |
207 | #define unmap_cpu_to_logical_apicid(cpu) do {} while (0) | ||
208 | #define map_cpu_to_logical_apicid() do {} while (0) | 190 | #define map_cpu_to_logical_apicid() do {} while (0) |
209 | #endif | 191 | #endif |
210 | 192 | ||
@@ -346,15 +328,8 @@ static void __cpuinit start_secondary(void *unused) | |||
346 | * smp_call_function(). | 328 | * smp_call_function(). |
347 | */ | 329 | */ |
348 | lock_ipi_call_lock(); | 330 | lock_ipi_call_lock(); |
349 | #ifdef CONFIG_X86_64 | 331 | #ifdef CONFIG_X86_IO_APIC |
350 | spin_lock(&vector_lock); | 332 | setup_vector_irq(smp_processor_id()); |
351 | |||
352 | /* Setup the per cpu irq handling data structures */ | ||
353 | __setup_vector_irq(smp_processor_id()); | ||
354 | /* | ||
355 | * Allow the master to continue. | ||
356 | */ | ||
357 | spin_unlock(&vector_lock); | ||
358 | #endif | 333 | #endif |
359 | cpu_set(smp_processor_id(), cpu_online_map); | 334 | cpu_set(smp_processor_id(), cpu_online_map); |
360 | unlock_ipi_call_lock(); | 335 | unlock_ipi_call_lock(); |
@@ -366,31 +341,8 @@ static void __cpuinit start_secondary(void *unused) | |||
366 | cpu_idle(); | 341 | cpu_idle(); |
367 | } | 342 | } |
368 | 343 | ||
369 | #ifdef CONFIG_X86_32 | ||
370 | /* | ||
371 | * Everything has been set up for the secondary | ||
372 | * CPUs - they just need to reload everything | ||
373 | * from the task structure | ||
374 | * This function must not return. | ||
375 | */ | ||
376 | void __devinit initialize_secondary(void) | ||
377 | { | ||
378 | /* | ||
379 | * We don't actually need to load the full TSS, | ||
380 | * basically just the stack pointer and the ip. | ||
381 | */ | ||
382 | |||
383 | asm volatile( | ||
384 | "movl %0,%%esp\n\t" | ||
385 | "jmp *%1" | ||
386 | : | ||
387 | :"m" (current->thread.sp), "m" (current->thread.ip)); | ||
388 | } | ||
389 | #endif | ||
390 | |||
391 | static void __cpuinit smp_apply_quirks(struct cpuinfo_x86 *c) | 344 | static void __cpuinit smp_apply_quirks(struct cpuinfo_x86 *c) |
392 | { | 345 | { |
393 | #ifdef CONFIG_X86_32 | ||
394 | /* | 346 | /* |
395 | * Mask B, Pentium, but not Pentium MMX | 347 | * Mask B, Pentium, but not Pentium MMX |
396 | */ | 348 | */ |
@@ -440,7 +392,6 @@ static void __cpuinit smp_apply_quirks(struct cpuinfo_x86 *c) | |||
440 | 392 | ||
441 | valid_k7: | 393 | valid_k7: |
442 | ; | 394 | ; |
443 | #endif | ||
444 | } | 395 | } |
445 | 396 | ||
446 | static void __cpuinit smp_checks(void) | 397 | static void __cpuinit smp_checks(void) |
@@ -555,23 +506,6 @@ cpumask_t cpu_coregroup_map(int cpu) | |||
555 | return c->llc_shared_map; | 506 | return c->llc_shared_map; |
556 | } | 507 | } |
557 | 508 | ||
558 | #ifdef CONFIG_X86_32 | ||
559 | /* | ||
560 | * We are called very early to get the low memory for the | ||
561 | * SMP bootup trampoline page. | ||
562 | */ | ||
563 | void __init smp_alloc_memory(void) | ||
564 | { | ||
565 | trampoline_base = alloc_bootmem_low_pages(PAGE_SIZE); | ||
566 | /* | ||
567 | * Has to be in very low memory so we can execute | ||
568 | * real-mode AP code. | ||
569 | */ | ||
570 | if (__pa(trampoline_base) >= 0x9F000) | ||
571 | BUG(); | ||
572 | } | ||
573 | #endif | ||
574 | |||
575 | static void impress_friends(void) | 509 | static void impress_friends(void) |
576 | { | 510 | { |
577 | int cpu; | 511 | int cpu; |
@@ -748,11 +682,7 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) | |||
748 | * target processor state. | 682 | * target processor state. |
749 | */ | 683 | */ |
750 | startup_ipi_hook(phys_apicid, (unsigned long) start_secondary, | 684 | startup_ipi_hook(phys_apicid, (unsigned long) start_secondary, |
751 | #ifdef CONFIG_X86_64 | ||
752 | (unsigned long)init_rsp); | ||
753 | #else | ||
754 | (unsigned long)stack_start.sp); | 685 | (unsigned long)stack_start.sp); |
755 | #endif | ||
756 | 686 | ||
757 | /* | 687 | /* |
758 | * Run STARTUP IPI loop. | 688 | * Run STARTUP IPI loop. |
@@ -832,6 +762,45 @@ static void __cpuinit do_fork_idle(struct work_struct *work) | |||
832 | complete(&c_idle->done); | 762 | complete(&c_idle->done); |
833 | } | 763 | } |
834 | 764 | ||
765 | #ifdef CONFIG_X86_64 | ||
766 | /* | ||
767 | * Allocate node local memory for the AP pda. | ||
768 | * | ||
769 | * Must be called after the _cpu_pda pointer table is initialized. | ||
770 | */ | ||
771 | static int __cpuinit get_local_pda(int cpu) | ||
772 | { | ||
773 | struct x8664_pda *oldpda, *newpda; | ||
774 | unsigned long size = sizeof(struct x8664_pda); | ||
775 | int node = cpu_to_node(cpu); | ||
776 | |||
777 | if (cpu_pda(cpu) && !cpu_pda(cpu)->in_bootmem) | ||
778 | return 0; | ||
779 | |||
780 | oldpda = cpu_pda(cpu); | ||
781 | newpda = kmalloc_node(size, GFP_ATOMIC, node); | ||
782 | if (!newpda) { | ||
783 | printk(KERN_ERR "Could not allocate node local PDA " | ||
784 | "for CPU %d on node %d\n", cpu, node); | ||
785 | |||
786 | if (oldpda) | ||
787 | return 0; /* have a usable pda */ | ||
788 | else | ||
789 | return -1; | ||
790 | } | ||
791 | |||
792 | if (oldpda) { | ||
793 | memcpy(newpda, oldpda, size); | ||
794 | if (!after_bootmem) | ||
795 | free_bootmem((unsigned long)oldpda, size); | ||
796 | } | ||
797 | |||
798 | newpda->in_bootmem = 0; | ||
799 | cpu_pda(cpu) = newpda; | ||
800 | return 0; | ||
801 | } | ||
802 | #endif /* CONFIG_X86_64 */ | ||
803 | |||
835 | static int __cpuinit do_boot_cpu(int apicid, int cpu) | 804 | static int __cpuinit do_boot_cpu(int apicid, int cpu) |
836 | /* | 805 | /* |
837 | * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad | 806 | * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad |
@@ -848,28 +817,14 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu) | |||
848 | .done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done), | 817 | .done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done), |
849 | }; | 818 | }; |
850 | INIT_WORK(&c_idle.work, do_fork_idle); | 819 | INIT_WORK(&c_idle.work, do_fork_idle); |
851 | #ifdef CONFIG_X86_64 | ||
852 | /* allocate memory for gdts of secondary cpus. Hotplug is considered */ | ||
853 | if (!cpu_gdt_descr[cpu].address && | ||
854 | !(cpu_gdt_descr[cpu].address = get_zeroed_page(GFP_KERNEL))) { | ||
855 | printk(KERN_ERR "Failed to allocate GDT for CPU %d\n", cpu); | ||
856 | return -1; | ||
857 | } | ||
858 | 820 | ||
821 | #ifdef CONFIG_X86_64 | ||
859 | /* Allocate node local memory for AP pdas */ | 822 | /* Allocate node local memory for AP pdas */ |
860 | if (cpu_pda(cpu) == &boot_cpu_pda[cpu]) { | 823 | if (cpu > 0) { |
861 | struct x8664_pda *newpda, *pda; | 824 | boot_error = get_local_pda(cpu); |
862 | int node = cpu_to_node(cpu); | 825 | if (boot_error) |
863 | pda = cpu_pda(cpu); | 826 | goto restore_state; |
864 | newpda = kmalloc_node(sizeof(struct x8664_pda), GFP_ATOMIC, | 827 | /* if can't get pda memory, can't start cpu */ |
865 | node); | ||
866 | if (newpda) { | ||
867 | memcpy(newpda, pda, sizeof(struct x8664_pda)); | ||
868 | cpu_pda(cpu) = newpda; | ||
869 | } else | ||
870 | printk(KERN_ERR | ||
871 | "Could not allocate node local PDA for CPU %d on node %d\n", | ||
872 | cpu, node); | ||
873 | } | 828 | } |
874 | #endif | 829 | #endif |
875 | 830 | ||
@@ -905,18 +860,15 @@ do_rest: | |||
905 | #ifdef CONFIG_X86_32 | 860 | #ifdef CONFIG_X86_32 |
906 | per_cpu(current_task, cpu) = c_idle.idle; | 861 | per_cpu(current_task, cpu) = c_idle.idle; |
907 | init_gdt(cpu); | 862 | init_gdt(cpu); |
908 | early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); | ||
909 | c_idle.idle->thread.ip = (unsigned long) start_secondary; | ||
910 | /* Stack for startup_32 can be just as for start_secondary onwards */ | 863 | /* Stack for startup_32 can be just as for start_secondary onwards */ |
911 | stack_start.sp = (void *) c_idle.idle->thread.sp; | ||
912 | irq_ctx_init(cpu); | 864 | irq_ctx_init(cpu); |
913 | #else | 865 | #else |
914 | cpu_pda(cpu)->pcurrent = c_idle.idle; | 866 | cpu_pda(cpu)->pcurrent = c_idle.idle; |
915 | init_rsp = c_idle.idle->thread.sp; | ||
916 | load_sp0(&per_cpu(init_tss, cpu), &c_idle.idle->thread); | ||
917 | initial_code = (unsigned long)start_secondary; | ||
918 | clear_tsk_thread_flag(c_idle.idle, TIF_FORK); | 867 | clear_tsk_thread_flag(c_idle.idle, TIF_FORK); |
919 | #endif | 868 | #endif |
869 | early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); | ||
870 | initial_code = (unsigned long)start_secondary; | ||
871 | stack_start.sp = (void *) c_idle.idle->thread.sp; | ||
920 | 872 | ||
921 | /* start_ip had better be page-aligned! */ | 873 | /* start_ip had better be page-aligned! */ |
922 | start_ip = setup_trampoline(); | 874 | start_ip = setup_trampoline(); |
@@ -987,13 +939,12 @@ do_rest: | |||
987 | inquire_remote_apic(apicid); | 939 | inquire_remote_apic(apicid); |
988 | } | 940 | } |
989 | } | 941 | } |
990 | |||
991 | if (boot_error) { | ||
992 | /* Try to put things back the way they were before ... */ | ||
993 | unmap_cpu_to_logical_apicid(cpu); | ||
994 | #ifdef CONFIG_X86_64 | 942 | #ifdef CONFIG_X86_64 |
995 | clear_node_cpumask(cpu); /* was set by numa_add_cpu */ | 943 | restore_state: |
996 | #endif | 944 | #endif |
945 | if (boot_error) { | ||
946 | /* Try to put things back the way they were before ... */ | ||
947 | numa_remove_cpu(cpu); /* was set by numa_add_cpu */ | ||
997 | cpu_clear(cpu, cpu_callout_map); /* was set by do_boot_cpu() */ | 948 | cpu_clear(cpu, cpu_callout_map); /* was set by do_boot_cpu() */ |
998 | cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */ | 949 | cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */ |
999 | cpu_clear(cpu, cpu_present_map); | 950 | cpu_clear(cpu, cpu_present_map); |
@@ -1087,14 +1038,12 @@ static __init void disable_smp(void) | |||
1087 | { | 1038 | { |
1088 | cpu_present_map = cpumask_of_cpu(0); | 1039 | cpu_present_map = cpumask_of_cpu(0); |
1089 | cpu_possible_map = cpumask_of_cpu(0); | 1040 | cpu_possible_map = cpumask_of_cpu(0); |
1090 | #ifdef CONFIG_X86_32 | ||
1091 | smpboot_clear_io_apic_irqs(); | 1041 | smpboot_clear_io_apic_irqs(); |
1092 | #endif | 1042 | |
1093 | if (smp_found_config) | 1043 | if (smp_found_config) |
1094 | phys_cpu_present_map = | 1044 | physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map); |
1095 | physid_mask_of_physid(boot_cpu_physical_apicid); | ||
1096 | else | 1045 | else |
1097 | phys_cpu_present_map = physid_mask_of_physid(0); | 1046 | physid_set_mask_of_physid(0, &phys_cpu_present_map); |
1098 | map_cpu_to_logical_apicid(); | 1047 | map_cpu_to_logical_apicid(); |
1099 | cpu_set(0, per_cpu(cpu_sibling_map, 0)); | 1048 | cpu_set(0, per_cpu(cpu_sibling_map, 0)); |
1100 | cpu_set(0, per_cpu(cpu_core_map, 0)); | 1049 | cpu_set(0, per_cpu(cpu_core_map, 0)); |
@@ -1157,12 +1106,12 @@ static int __init smp_sanity_check(unsigned max_cpus) | |||
1157 | * If SMP should be disabled, then really disable it! | 1106 | * If SMP should be disabled, then really disable it! |
1158 | */ | 1107 | */ |
1159 | if (!max_cpus) { | 1108 | if (!max_cpus) { |
1160 | printk(KERN_INFO "SMP mode deactivated," | 1109 | printk(KERN_INFO "SMP mode deactivated.\n"); |
1161 | "forcing use of dummy APIC emulation.\n"); | ||
1162 | smpboot_clear_io_apic(); | 1110 | smpboot_clear_io_apic(); |
1163 | #ifdef CONFIG_X86_32 | 1111 | |
1112 | localise_nmi_watchdog(); | ||
1113 | |||
1164 | connect_bsp_APIC(); | 1114 | connect_bsp_APIC(); |
1165 | #endif | ||
1166 | setup_local_APIC(); | 1115 | setup_local_APIC(); |
1167 | end_local_APIC_setup(); | 1116 | end_local_APIC_setup(); |
1168 | return -1; | 1117 | return -1; |
@@ -1190,7 +1139,6 @@ static void __init smp_cpu_index_default(void) | |||
1190 | void __init native_smp_prepare_cpus(unsigned int max_cpus) | 1139 | void __init native_smp_prepare_cpus(unsigned int max_cpus) |
1191 | { | 1140 | { |
1192 | preempt_disable(); | 1141 | preempt_disable(); |
1193 | nmi_watchdog_default(); | ||
1194 | smp_cpu_index_default(); | 1142 | smp_cpu_index_default(); |
1195 | current_cpu_data = boot_cpu_data; | 1143 | current_cpu_data = boot_cpu_data; |
1196 | cpu_callin_map = cpumask_of_cpu(0); | 1144 | cpu_callin_map = cpumask_of_cpu(0); |
@@ -1217,9 +1165,8 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) | |||
1217 | } | 1165 | } |
1218 | preempt_enable(); | 1166 | preempt_enable(); |
1219 | 1167 | ||
1220 | #ifdef CONFIG_X86_32 | ||
1221 | connect_bsp_APIC(); | 1168 | connect_bsp_APIC(); |
1222 | #endif | 1169 | |
1223 | /* | 1170 | /* |
1224 | * Switch from PIC to APIC mode. | 1171 | * Switch from PIC to APIC mode. |
1225 | */ | 1172 | */ |
@@ -1257,8 +1204,8 @@ void __init native_smp_prepare_boot_cpu(void) | |||
1257 | int me = smp_processor_id(); | 1204 | int me = smp_processor_id(); |
1258 | #ifdef CONFIG_X86_32 | 1205 | #ifdef CONFIG_X86_32 |
1259 | init_gdt(me); | 1206 | init_gdt(me); |
1260 | switch_to_new_gdt(); | ||
1261 | #endif | 1207 | #endif |
1208 | switch_to_new_gdt(); | ||
1262 | /* already set me in cpu_online_map in boot_cpu_init() */ | 1209 | /* already set me in cpu_online_map in boot_cpu_init() */ |
1263 | cpu_set(me, cpu_callout_map); | 1210 | cpu_set(me, cpu_callout_map); |
1264 | per_cpu(cpu_state, me) = CPU_ONLINE; | 1211 | per_cpu(cpu_state, me) = CPU_ONLINE; |
@@ -1278,23 +1225,6 @@ void __init native_smp_cpus_done(unsigned int max_cpus) | |||
1278 | 1225 | ||
1279 | #ifdef CONFIG_HOTPLUG_CPU | 1226 | #ifdef CONFIG_HOTPLUG_CPU |
1280 | 1227 | ||
1281 | # ifdef CONFIG_X86_32 | ||
1282 | void cpu_exit_clear(void) | ||
1283 | { | ||
1284 | int cpu = raw_smp_processor_id(); | ||
1285 | |||
1286 | idle_task_exit(); | ||
1287 | |||
1288 | cpu_uninit(); | ||
1289 | irq_ctx_exit(cpu); | ||
1290 | |||
1291 | cpu_clear(cpu, cpu_callout_map); | ||
1292 | cpu_clear(cpu, cpu_callin_map); | ||
1293 | |||
1294 | unmap_cpu_to_logical_apicid(cpu); | ||
1295 | } | ||
1296 | # endif /* CONFIG_X86_32 */ | ||
1297 | |||
1298 | static void remove_siblinginfo(int cpu) | 1228 | static void remove_siblinginfo(int cpu) |
1299 | { | 1229 | { |
1300 | int sibling; | 1230 | int sibling; |
@@ -1348,12 +1278,20 @@ __init void prefill_possible_map(void) | |||
1348 | int i; | 1278 | int i; |
1349 | int possible; | 1279 | int possible; |
1350 | 1280 | ||
1281 | /* no processor from mptable or madt */ | ||
1282 | if (!num_processors) | ||
1283 | num_processors = 1; | ||
1284 | |||
1285 | #ifdef CONFIG_HOTPLUG_CPU | ||
1351 | if (additional_cpus == -1) { | 1286 | if (additional_cpus == -1) { |
1352 | if (disabled_cpus > 0) | 1287 | if (disabled_cpus > 0) |
1353 | additional_cpus = disabled_cpus; | 1288 | additional_cpus = disabled_cpus; |
1354 | else | 1289 | else |
1355 | additional_cpus = 0; | 1290 | additional_cpus = 0; |
1356 | } | 1291 | } |
1292 | #else | ||
1293 | additional_cpus = 0; | ||
1294 | #endif | ||
1357 | possible = num_processors + additional_cpus; | 1295 | possible = num_processors + additional_cpus; |
1358 | if (possible > NR_CPUS) | 1296 | if (possible > NR_CPUS) |
1359 | possible = NR_CPUS; | 1297 | possible = NR_CPUS; |
@@ -1363,18 +1301,18 @@ __init void prefill_possible_map(void) | |||
1363 | 1301 | ||
1364 | for (i = 0; i < possible; i++) | 1302 | for (i = 0; i < possible; i++) |
1365 | cpu_set(i, cpu_possible_map); | 1303 | cpu_set(i, cpu_possible_map); |
1304 | |||
1305 | nr_cpu_ids = possible; | ||
1366 | } | 1306 | } |
1367 | 1307 | ||
1368 | static void __ref remove_cpu_from_maps(int cpu) | 1308 | static void __ref remove_cpu_from_maps(int cpu) |
1369 | { | 1309 | { |
1370 | cpu_clear(cpu, cpu_online_map); | 1310 | cpu_clear(cpu, cpu_online_map); |
1371 | #ifdef CONFIG_X86_64 | ||
1372 | cpu_clear(cpu, cpu_callout_map); | 1311 | cpu_clear(cpu, cpu_callout_map); |
1373 | cpu_clear(cpu, cpu_callin_map); | 1312 | cpu_clear(cpu, cpu_callin_map); |
1374 | /* was set by cpu_init() */ | 1313 | /* was set by cpu_init() */ |
1375 | clear_bit(cpu, (unsigned long *)&cpu_initialized); | 1314 | clear_bit(cpu, (unsigned long *)&cpu_initialized); |
1376 | clear_node_cpumask(cpu); | 1315 | numa_remove_cpu(cpu); |
1377 | #endif | ||
1378 | } | 1316 | } |
1379 | 1317 | ||
1380 | int __cpu_disable(void) | 1318 | int __cpu_disable(void) |