diff options
Diffstat (limited to 'arch/x86/kernel/smpboot.c')
-rw-r--r-- | arch/x86/kernel/smpboot.c | 132 |
1 files changed, 36 insertions, 96 deletions
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index bb1a3b1fc87f..9ce666387f37 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
@@ -2,7 +2,7 @@ | |||
2 | * x86 SMP booting functions | 2 | * x86 SMP booting functions |
3 | * | 3 | * |
4 | * (c) 1995 Alan Cox, Building #3 <alan@lxorguk.ukuu.org.uk> | 4 | * (c) 1995 Alan Cox, Building #3 <alan@lxorguk.ukuu.org.uk> |
5 | * (c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com> | 5 | * (c) 1998, 1999, 2000, 2009 Ingo Molnar <mingo@redhat.com> |
6 | * Copyright 2001 Andi Kleen, SuSE Labs. | 6 | * Copyright 2001 Andi Kleen, SuSE Labs. |
7 | * | 7 | * |
8 | * Much of the core SMP work is based on previous work by Thomas Radke, to | 8 | * Much of the core SMP work is based on previous work by Thomas Radke, to |
@@ -53,7 +53,6 @@ | |||
53 | #include <asm/nmi.h> | 53 | #include <asm/nmi.h> |
54 | #include <asm/irq.h> | 54 | #include <asm/irq.h> |
55 | #include <asm/idle.h> | 55 | #include <asm/idle.h> |
56 | #include <asm/smp.h> | ||
57 | #include <asm/trampoline.h> | 56 | #include <asm/trampoline.h> |
58 | #include <asm/cpu.h> | 57 | #include <asm/cpu.h> |
59 | #include <asm/numa.h> | 58 | #include <asm/numa.h> |
@@ -61,13 +60,12 @@ | |||
61 | #include <asm/tlbflush.h> | 60 | #include <asm/tlbflush.h> |
62 | #include <asm/mtrr.h> | 61 | #include <asm/mtrr.h> |
63 | #include <asm/vmi.h> | 62 | #include <asm/vmi.h> |
64 | #include <asm/genapic.h> | 63 | #include <asm/apic.h> |
65 | #include <asm/setup.h> | 64 | #include <asm/setup.h> |
65 | #include <asm/uv/uv.h> | ||
66 | #include <linux/mc146818rtc.h> | 66 | #include <linux/mc146818rtc.h> |
67 | 67 | ||
68 | #include <mach_apic.h> | 68 | #include <asm/smpboot_hooks.h> |
69 | #include <mach_wakecpu.h> | ||
70 | #include <smpboot_hooks.h> | ||
71 | 69 | ||
72 | #ifdef CONFIG_X86_32 | 70 | #ifdef CONFIG_X86_32 |
73 | u8 apicid_2_node[MAX_APICID]; | 71 | u8 apicid_2_node[MAX_APICID]; |
@@ -163,7 +161,7 @@ static void map_cpu_to_logical_apicid(void) | |||
163 | { | 161 | { |
164 | int cpu = smp_processor_id(); | 162 | int cpu = smp_processor_id(); |
165 | int apicid = logical_smp_processor_id(); | 163 | int apicid = logical_smp_processor_id(); |
166 | int node = apicid_to_node(apicid); | 164 | int node = apic->apicid_to_node(apicid); |
167 | 165 | ||
168 | if (!node_online(node)) | 166 | if (!node_online(node)) |
169 | node = first_online_node; | 167 | node = first_online_node; |
@@ -196,7 +194,8 @@ static void __cpuinit smp_callin(void) | |||
196 | * our local APIC. We have to wait for the IPI or we'll | 194 | * our local APIC. We have to wait for the IPI or we'll |
197 | * lock up on an APIC access. | 195 | * lock up on an APIC access. |
198 | */ | 196 | */ |
199 | wait_for_init_deassert(&init_deasserted); | 197 | if (apic->wait_for_init_deassert) |
198 | apic->wait_for_init_deassert(&init_deasserted); | ||
200 | 199 | ||
201 | /* | 200 | /* |
202 | * (This works even if the APIC is not enabled.) | 201 | * (This works even if the APIC is not enabled.) |
@@ -243,7 +242,8 @@ static void __cpuinit smp_callin(void) | |||
243 | */ | 242 | */ |
244 | 243 | ||
245 | pr_debug("CALLIN, before setup_local_APIC().\n"); | 244 | pr_debug("CALLIN, before setup_local_APIC().\n"); |
246 | smp_callin_clear_local_apic(); | 245 | if (apic->smp_callin_clear_local_apic) |
246 | apic->smp_callin_clear_local_apic(); | ||
247 | setup_local_APIC(); | 247 | setup_local_APIC(); |
248 | end_local_APIC_setup(); | 248 | end_local_APIC_setup(); |
249 | map_cpu_to_logical_apicid(); | 249 | map_cpu_to_logical_apicid(); |
@@ -583,7 +583,7 @@ wakeup_secondary_cpu_via_nmi(int logical_apicid, unsigned long start_eip) | |||
583 | /* Target chip */ | 583 | /* Target chip */ |
584 | /* Boot on the stack */ | 584 | /* Boot on the stack */ |
585 | /* Kick the second */ | 585 | /* Kick the second */ |
586 | apic_icr_write(APIC_DM_NMI | APIC_DEST_LOGICAL, logical_apicid); | 586 | apic_icr_write(APIC_DM_NMI | apic->dest_logical, logical_apicid); |
587 | 587 | ||
588 | pr_debug("Waiting for send to finish...\n"); | 588 | pr_debug("Waiting for send to finish...\n"); |
589 | send_status = safe_apic_wait_icr_idle(); | 589 | send_status = safe_apic_wait_icr_idle(); |
@@ -745,78 +745,22 @@ static void __cpuinit do_fork_idle(struct work_struct *work) | |||
745 | complete(&c_idle->done); | 745 | complete(&c_idle->done); |
746 | } | 746 | } |
747 | 747 | ||
748 | #ifdef CONFIG_X86_64 | ||
749 | |||
750 | /* __ref because it's safe to call free_bootmem when after_bootmem == 0. */ | ||
751 | static void __ref free_bootmem_pda(struct x8664_pda *oldpda) | ||
752 | { | ||
753 | if (!after_bootmem) | ||
754 | free_bootmem((unsigned long)oldpda, sizeof(*oldpda)); | ||
755 | } | ||
756 | |||
757 | /* | ||
758 | * Allocate node local memory for the AP pda. | ||
759 | * | ||
760 | * Must be called after the _cpu_pda pointer table is initialized. | ||
761 | */ | ||
762 | int __cpuinit get_local_pda(int cpu) | ||
763 | { | ||
764 | struct x8664_pda *oldpda, *newpda; | ||
765 | unsigned long size = sizeof(struct x8664_pda); | ||
766 | int node = cpu_to_node(cpu); | ||
767 | |||
768 | if (cpu_pda(cpu) && !cpu_pda(cpu)->in_bootmem) | ||
769 | return 0; | ||
770 | |||
771 | oldpda = cpu_pda(cpu); | ||
772 | newpda = kmalloc_node(size, GFP_ATOMIC, node); | ||
773 | if (!newpda) { | ||
774 | printk(KERN_ERR "Could not allocate node local PDA " | ||
775 | "for CPU %d on node %d\n", cpu, node); | ||
776 | |||
777 | if (oldpda) | ||
778 | return 0; /* have a usable pda */ | ||
779 | else | ||
780 | return -1; | ||
781 | } | ||
782 | |||
783 | if (oldpda) { | ||
784 | memcpy(newpda, oldpda, size); | ||
785 | free_bootmem_pda(oldpda); | ||
786 | } | ||
787 | |||
788 | newpda->in_bootmem = 0; | ||
789 | cpu_pda(cpu) = newpda; | ||
790 | return 0; | ||
791 | } | ||
792 | #endif /* CONFIG_X86_64 */ | ||
793 | |||
794 | static int __cpuinit do_boot_cpu(int apicid, int cpu) | ||
795 | /* | 748 | /* |
796 | * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad | 749 | * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad |
797 | * (ie clustered apic addressing mode), this is a LOGICAL apic ID. | 750 | * (ie clustered apic addressing mode), this is a LOGICAL apic ID. |
798 | * Returns zero if CPU booted OK, else error code from wakeup_secondary_cpu. | 751 | * Returns zero if CPU booted OK, else error code from ->wakeup_cpu. |
799 | */ | 752 | */ |
753 | static int __cpuinit do_boot_cpu(int apicid, int cpu) | ||
800 | { | 754 | { |
801 | unsigned long boot_error = 0; | 755 | unsigned long boot_error = 0; |
802 | int timeout; | ||
803 | unsigned long start_ip; | 756 | unsigned long start_ip; |
804 | unsigned short nmi_high = 0, nmi_low = 0; | 757 | int timeout; |
805 | struct create_idle c_idle = { | 758 | struct create_idle c_idle = { |
806 | .cpu = cpu, | 759 | .cpu = cpu, |
807 | .done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done), | 760 | .done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done), |
808 | }; | 761 | }; |
809 | INIT_WORK(&c_idle.work, do_fork_idle); | ||
810 | 762 | ||
811 | #ifdef CONFIG_X86_64 | 763 | INIT_WORK(&c_idle.work, do_fork_idle); |
812 | /* Allocate node local memory for AP pdas */ | ||
813 | if (cpu > 0) { | ||
814 | boot_error = get_local_pda(cpu); | ||
815 | if (boot_error) | ||
816 | goto restore_state; | ||
817 | /* if can't get pda memory, can't start cpu */ | ||
818 | } | ||
819 | #endif | ||
820 | 764 | ||
821 | alternatives_smp_switch(1); | 765 | alternatives_smp_switch(1); |
822 | 766 | ||
@@ -847,14 +791,16 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu) | |||
847 | 791 | ||
848 | set_idle_for_cpu(cpu, c_idle.idle); | 792 | set_idle_for_cpu(cpu, c_idle.idle); |
849 | do_rest: | 793 | do_rest: |
850 | #ifdef CONFIG_X86_32 | ||
851 | per_cpu(current_task, cpu) = c_idle.idle; | 794 | per_cpu(current_task, cpu) = c_idle.idle; |
852 | init_gdt(cpu); | 795 | #ifdef CONFIG_X86_32 |
853 | /* Stack for startup_32 can be just as for start_secondary onwards */ | 796 | /* Stack for startup_32 can be just as for start_secondary onwards */ |
854 | irq_ctx_init(cpu); | 797 | irq_ctx_init(cpu); |
855 | #else | 798 | #else |
856 | cpu_pda(cpu)->pcurrent = c_idle.idle; | ||
857 | clear_tsk_thread_flag(c_idle.idle, TIF_FORK); | 799 | clear_tsk_thread_flag(c_idle.idle, TIF_FORK); |
800 | initial_gs = per_cpu_offset(cpu); | ||
801 | per_cpu(kernel_stack, cpu) = | ||
802 | (unsigned long)task_stack_page(c_idle.idle) - | ||
803 | KERNEL_STACK_OFFSET + THREAD_SIZE; | ||
858 | #endif | 804 | #endif |
859 | early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); | 805 | early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); |
860 | initial_code = (unsigned long)start_secondary; | 806 | initial_code = (unsigned long)start_secondary; |
@@ -878,8 +824,6 @@ do_rest: | |||
878 | 824 | ||
879 | pr_debug("Setting warm reset code and vector.\n"); | 825 | pr_debug("Setting warm reset code and vector.\n"); |
880 | 826 | ||
881 | store_NMI_vector(&nmi_high, &nmi_low); | ||
882 | |||
883 | smpboot_setup_warm_reset_vector(start_ip); | 827 | smpboot_setup_warm_reset_vector(start_ip); |
884 | /* | 828 | /* |
885 | * Be paranoid about clearing APIC errors. | 829 | * Be paranoid about clearing APIC errors. |
@@ -893,7 +837,7 @@ do_rest: | |||
893 | /* | 837 | /* |
894 | * Starting actual IPI sequence... | 838 | * Starting actual IPI sequence... |
895 | */ | 839 | */ |
896 | boot_error = wakeup_secondary_cpu(apicid, start_ip); | 840 | boot_error = apic->wakeup_cpu(apicid, start_ip); |
897 | 841 | ||
898 | if (!boot_error) { | 842 | if (!boot_error) { |
899 | /* | 843 | /* |
@@ -927,13 +871,11 @@ do_rest: | |||
927 | else | 871 | else |
928 | /* trampoline code not run */ | 872 | /* trampoline code not run */ |
929 | printk(KERN_ERR "Not responding.\n"); | 873 | printk(KERN_ERR "Not responding.\n"); |
930 | if (get_uv_system_type() != UV_NON_UNIQUE_APIC) | 874 | if (apic->inquire_remote_apic) |
931 | inquire_remote_apic(apicid); | 875 | apic->inquire_remote_apic(apicid); |
932 | } | 876 | } |
933 | } | 877 | } |
934 | #ifdef CONFIG_X86_64 | 878 | |
935 | restore_state: | ||
936 | #endif | ||
937 | if (boot_error) { | 879 | if (boot_error) { |
938 | /* Try to put things back the way they were before ... */ | 880 | /* Try to put things back the way they were before ... */ |
939 | numa_remove_cpu(cpu); /* was set by numa_add_cpu */ | 881 | numa_remove_cpu(cpu); /* was set by numa_add_cpu */ |
@@ -961,7 +903,7 @@ restore_state: | |||
961 | 903 | ||
962 | int __cpuinit native_cpu_up(unsigned int cpu) | 904 | int __cpuinit native_cpu_up(unsigned int cpu) |
963 | { | 905 | { |
964 | int apicid = cpu_present_to_apicid(cpu); | 906 | int apicid = apic->cpu_present_to_apicid(cpu); |
965 | unsigned long flags; | 907 | unsigned long flags; |
966 | int err; | 908 | int err; |
967 | 909 | ||
@@ -1054,14 +996,14 @@ static int __init smp_sanity_check(unsigned max_cpus) | |||
1054 | { | 996 | { |
1055 | preempt_disable(); | 997 | preempt_disable(); |
1056 | 998 | ||
1057 | #if defined(CONFIG_X86_PC) && defined(CONFIG_X86_32) | 999 | #if !defined(CONFIG_X86_BIGSMP) && defined(CONFIG_X86_32) |
1058 | if (def_to_bigsmp && nr_cpu_ids > 8) { | 1000 | if (def_to_bigsmp && nr_cpu_ids > 8) { |
1059 | unsigned int cpu; | 1001 | unsigned int cpu; |
1060 | unsigned nr; | 1002 | unsigned nr; |
1061 | 1003 | ||
1062 | printk(KERN_WARNING | 1004 | printk(KERN_WARNING |
1063 | "More than 8 CPUs detected - skipping them.\n" | 1005 | "More than 8 CPUs detected - skipping them.\n" |
1064 | "Use CONFIG_X86_GENERICARCH and CONFIG_X86_BIGSMP.\n"); | 1006 | "Use CONFIG_X86_BIGSMP.\n"); |
1065 | 1007 | ||
1066 | nr = 0; | 1008 | nr = 0; |
1067 | for_each_present_cpu(cpu) { | 1009 | for_each_present_cpu(cpu) { |
@@ -1107,7 +1049,7 @@ static int __init smp_sanity_check(unsigned max_cpus) | |||
1107 | * Should not be necessary because the MP table should list the boot | 1049 | * Should not be necessary because the MP table should list the boot |
1108 | * CPU too, but we do it for the sake of robustness anyway. | 1050 | * CPU too, but we do it for the sake of robustness anyway. |
1109 | */ | 1051 | */ |
1110 | if (!check_phys_apicid_present(boot_cpu_physical_apicid)) { | 1052 | if (!apic->check_phys_apicid_present(boot_cpu_physical_apicid)) { |
1111 | printk(KERN_NOTICE | 1053 | printk(KERN_NOTICE |
1112 | "weird, boot CPU (#%d) not listed by the BIOS.\n", | 1054 | "weird, boot CPU (#%d) not listed by the BIOS.\n", |
1113 | boot_cpu_physical_apicid); | 1055 | boot_cpu_physical_apicid); |
@@ -1125,6 +1067,7 @@ static int __init smp_sanity_check(unsigned max_cpus) | |||
1125 | printk(KERN_ERR "... forcing use of dummy APIC emulation." | 1067 | printk(KERN_ERR "... forcing use of dummy APIC emulation." |
1126 | "(tell your hw vendor)\n"); | 1068 | "(tell your hw vendor)\n"); |
1127 | smpboot_clear_io_apic(); | 1069 | smpboot_clear_io_apic(); |
1070 | arch_disable_smp_support(); | ||
1128 | return -1; | 1071 | return -1; |
1129 | } | 1072 | } |
1130 | 1073 | ||
@@ -1181,9 +1124,9 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) | |||
1181 | current_thread_info()->cpu = 0; /* needed? */ | 1124 | current_thread_info()->cpu = 0; /* needed? */ |
1182 | set_cpu_sibling_map(0); | 1125 | set_cpu_sibling_map(0); |
1183 | 1126 | ||
1184 | #ifdef CONFIG_X86_64 | ||
1185 | enable_IR_x2apic(); | 1127 | enable_IR_x2apic(); |
1186 | setup_apic_routing(); | 1128 | #ifdef CONFIG_X86_64 |
1129 | default_setup_apic_routing(); | ||
1187 | #endif | 1130 | #endif |
1188 | 1131 | ||
1189 | if (smp_sanity_check(max_cpus) < 0) { | 1132 | if (smp_sanity_check(max_cpus) < 0) { |
@@ -1207,18 +1150,18 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) | |||
1207 | */ | 1150 | */ |
1208 | setup_local_APIC(); | 1151 | setup_local_APIC(); |
1209 | 1152 | ||
1210 | #ifdef CONFIG_X86_64 | ||
1211 | /* | 1153 | /* |
1212 | * Enable IO APIC before setting up error vector | 1154 | * Enable IO APIC before setting up error vector |
1213 | */ | 1155 | */ |
1214 | if (!skip_ioapic_setup && nr_ioapics) | 1156 | if (!skip_ioapic_setup && nr_ioapics) |
1215 | enable_IO_APIC(); | 1157 | enable_IO_APIC(); |
1216 | #endif | 1158 | |
1217 | end_local_APIC_setup(); | 1159 | end_local_APIC_setup(); |
1218 | 1160 | ||
1219 | map_cpu_to_logical_apicid(); | 1161 | map_cpu_to_logical_apicid(); |
1220 | 1162 | ||
1221 | setup_portio_remap(); | 1163 | if (apic->setup_portio_remap) |
1164 | apic->setup_portio_remap(); | ||
1222 | 1165 | ||
1223 | smpboot_setup_io_apic(); | 1166 | smpboot_setup_io_apic(); |
1224 | /* | 1167 | /* |
@@ -1240,10 +1183,7 @@ out: | |||
1240 | void __init native_smp_prepare_boot_cpu(void) | 1183 | void __init native_smp_prepare_boot_cpu(void) |
1241 | { | 1184 | { |
1242 | int me = smp_processor_id(); | 1185 | int me = smp_processor_id(); |
1243 | #ifdef CONFIG_X86_32 | 1186 | switch_to_new_gdt(me); |
1244 | init_gdt(me); | ||
1245 | #endif | ||
1246 | switch_to_new_gdt(); | ||
1247 | /* already set me in cpu_online_mask in boot_cpu_init() */ | 1187 | /* already set me in cpu_online_mask in boot_cpu_init() */ |
1248 | cpumask_set_cpu(me, cpu_callout_mask); | 1188 | cpumask_set_cpu(me, cpu_callout_mask); |
1249 | per_cpu(cpu_state, me) = CPU_ONLINE; | 1189 | per_cpu(cpu_state, me) = CPU_ONLINE; |