diff options
Diffstat (limited to 'arch/i386/kernel/smpboot.c')
-rw-r--r-- | arch/i386/kernel/smpboot.c | 98 |
1 files changed, 53 insertions, 45 deletions
diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index 4bb8b77cd65b..dea7ef9d3e82 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c | |||
@@ -33,6 +33,11 @@ | |||
33 | * Dave Jones : Report invalid combinations of Athlon CPUs. | 33 | * Dave Jones : Report invalid combinations of Athlon CPUs. |
34 | * Rusty Russell : Hacked into shape for new "hotplug" boot process. */ | 34 | * Rusty Russell : Hacked into shape for new "hotplug" boot process. */ |
35 | 35 | ||
36 | |||
37 | /* SMP boot always wants to use real time delay to allow sufficient time for | ||
38 | * the APs to come online */ | ||
39 | #define USE_REAL_TIME_DELAY | ||
40 | |||
36 | #include <linux/module.h> | 41 | #include <linux/module.h> |
37 | #include <linux/init.h> | 42 | #include <linux/init.h> |
38 | #include <linux/kernel.h> | 43 | #include <linux/kernel.h> |
@@ -52,6 +57,8 @@ | |||
52 | #include <asm/desc.h> | 57 | #include <asm/desc.h> |
53 | #include <asm/arch_hooks.h> | 58 | #include <asm/arch_hooks.h> |
54 | #include <asm/nmi.h> | 59 | #include <asm/nmi.h> |
60 | #include <asm/pda.h> | ||
61 | #include <asm/genapic.h> | ||
55 | 62 | ||
56 | #include <mach_apic.h> | 63 | #include <mach_apic.h> |
57 | #include <mach_wakecpu.h> | 64 | #include <mach_wakecpu.h> |
@@ -62,9 +69,7 @@ static int __devinitdata smp_b_stepping; | |||
62 | 69 | ||
63 | /* Number of siblings per CPU package */ | 70 | /* Number of siblings per CPU package */ |
64 | int smp_num_siblings = 1; | 71 | int smp_num_siblings = 1; |
65 | #ifdef CONFIG_X86_HT | ||
66 | EXPORT_SYMBOL(smp_num_siblings); | 72 | EXPORT_SYMBOL(smp_num_siblings); |
67 | #endif | ||
68 | 73 | ||
69 | /* Last level cache ID of each logical CPU */ | 74 | /* Last level cache ID of each logical CPU */ |
70 | int cpu_llc_id[NR_CPUS] __cpuinitdata = {[0 ... NR_CPUS-1] = BAD_APICID}; | 75 | int cpu_llc_id[NR_CPUS] __cpuinitdata = {[0 ... NR_CPUS-1] = BAD_APICID}; |
@@ -154,7 +159,7 @@ void __init smp_alloc_memory(void) | |||
154 | * a given CPU | 159 | * a given CPU |
155 | */ | 160 | */ |
156 | 161 | ||
157 | static void __devinit smp_store_cpu_info(int id) | 162 | static void __cpuinit smp_store_cpu_info(int id) |
158 | { | 163 | { |
159 | struct cpuinfo_x86 *c = cpu_data + id; | 164 | struct cpuinfo_x86 *c = cpu_data + id; |
160 | 165 | ||
@@ -222,7 +227,7 @@ static struct { | |||
222 | atomic_t count_start; | 227 | atomic_t count_start; |
223 | atomic_t count_stop; | 228 | atomic_t count_stop; |
224 | unsigned long long values[NR_CPUS]; | 229 | unsigned long long values[NR_CPUS]; |
225 | } tsc __initdata = { | 230 | } tsc __cpuinitdata = { |
226 | .start_flag = ATOMIC_INIT(0), | 231 | .start_flag = ATOMIC_INIT(0), |
227 | .count_start = ATOMIC_INIT(0), | 232 | .count_start = ATOMIC_INIT(0), |
228 | .count_stop = ATOMIC_INIT(0), | 233 | .count_stop = ATOMIC_INIT(0), |
@@ -327,7 +332,7 @@ static void __init synchronize_tsc_bp(void) | |||
327 | printk("passed.\n"); | 332 | printk("passed.\n"); |
328 | } | 333 | } |
329 | 334 | ||
330 | static void __init synchronize_tsc_ap(void) | 335 | static void __cpuinit synchronize_tsc_ap(void) |
331 | { | 336 | { |
332 | int i; | 337 | int i; |
333 | 338 | ||
@@ -359,7 +364,7 @@ extern void calibrate_delay(void); | |||
359 | 364 | ||
360 | static atomic_t init_deasserted; | 365 | static atomic_t init_deasserted; |
361 | 366 | ||
362 | static void __devinit smp_callin(void) | 367 | static void __cpuinit smp_callin(void) |
363 | { | 368 | { |
364 | int cpuid, phys_id; | 369 | int cpuid, phys_id; |
365 | unsigned long timeout; | 370 | unsigned long timeout; |
@@ -533,14 +538,14 @@ set_cpu_sibling_map(int cpu) | |||
533 | /* | 538 | /* |
534 | * Activate a secondary processor. | 539 | * Activate a secondary processor. |
535 | */ | 540 | */ |
536 | static void __devinit start_secondary(void *unused) | 541 | static void __cpuinit start_secondary(void *unused) |
537 | { | 542 | { |
538 | /* | 543 | /* |
539 | * Dont put anything before smp_callin(), SMP | 544 | * Don't put *anything* before secondary_cpu_init(), SMP |
540 | * booting is too fragile that we want to limit the | 545 | * booting is too fragile that we want to limit the |
541 | * things done here to the most necessary things. | 546 | * things done here to the most necessary things. |
542 | */ | 547 | */ |
543 | cpu_init(); | 548 | secondary_cpu_init(); |
544 | preempt_disable(); | 549 | preempt_disable(); |
545 | smp_callin(); | 550 | smp_callin(); |
546 | while (!cpu_isset(smp_processor_id(), smp_commenced_mask)) | 551 | while (!cpu_isset(smp_processor_id(), smp_commenced_mask)) |
@@ -599,13 +604,16 @@ void __devinit initialize_secondary(void) | |||
599 | "movl %0,%%esp\n\t" | 604 | "movl %0,%%esp\n\t" |
600 | "jmp *%1" | 605 | "jmp *%1" |
601 | : | 606 | : |
602 | :"r" (current->thread.esp),"r" (current->thread.eip)); | 607 | :"m" (current->thread.esp),"m" (current->thread.eip)); |
603 | } | 608 | } |
604 | 609 | ||
610 | /* Static state in head.S used to set up a CPU */ | ||
605 | extern struct { | 611 | extern struct { |
606 | void * esp; | 612 | void * esp; |
607 | unsigned short ss; | 613 | unsigned short ss; |
608 | } stack_start; | 614 | } stack_start; |
615 | extern struct i386_pda *start_pda; | ||
616 | extern struct Xgt_desc_struct cpu_gdt_descr; | ||
609 | 617 | ||
610 | #ifdef CONFIG_NUMA | 618 | #ifdef CONFIG_NUMA |
611 | 619 | ||
@@ -923,7 +931,7 @@ static inline struct task_struct * alloc_idle_task(int cpu) | |||
923 | #define alloc_idle_task(cpu) fork_idle(cpu) | 931 | #define alloc_idle_task(cpu) fork_idle(cpu) |
924 | #endif | 932 | #endif |
925 | 933 | ||
926 | static int __devinit do_boot_cpu(int apicid, int cpu) | 934 | static int __cpuinit do_boot_cpu(int apicid, int cpu) |
927 | /* | 935 | /* |
928 | * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad | 936 | * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad |
929 | * (ie clustered apic addressing mode), this is a LOGICAL apic ID. | 937 | * (ie clustered apic addressing mode), this is a LOGICAL apic ID. |
@@ -936,9 +944,6 @@ static int __devinit do_boot_cpu(int apicid, int cpu) | |||
936 | unsigned long start_eip; | 944 | unsigned long start_eip; |
937 | unsigned short nmi_high = 0, nmi_low = 0; | 945 | unsigned short nmi_high = 0, nmi_low = 0; |
938 | 946 | ||
939 | ++cpucount; | ||
940 | alternatives_smp_switch(1); | ||
941 | |||
942 | /* | 947 | /* |
943 | * We can't use kernel_thread since we must avoid to | 948 | * We can't use kernel_thread since we must avoid to |
944 | * reschedule the child. | 949 | * reschedule the child. |
@@ -946,15 +951,30 @@ static int __devinit do_boot_cpu(int apicid, int cpu) | |||
946 | idle = alloc_idle_task(cpu); | 951 | idle = alloc_idle_task(cpu); |
947 | if (IS_ERR(idle)) | 952 | if (IS_ERR(idle)) |
948 | panic("failed fork for CPU %d", cpu); | 953 | panic("failed fork for CPU %d", cpu); |
954 | |||
955 | /* Pre-allocate and initialize the CPU's GDT and PDA so it | ||
956 | doesn't have to do any memory allocation during the | ||
957 | delicate CPU-bringup phase. */ | ||
958 | if (!init_gdt(cpu, idle)) { | ||
959 | printk(KERN_INFO "Couldn't allocate GDT/PDA for CPU %d\n", cpu); | ||
960 | return -1; /* ? */ | ||
961 | } | ||
962 | |||
949 | idle->thread.eip = (unsigned long) start_secondary; | 963 | idle->thread.eip = (unsigned long) start_secondary; |
950 | /* start_eip had better be page-aligned! */ | 964 | /* start_eip had better be page-aligned! */ |
951 | start_eip = setup_trampoline(); | 965 | start_eip = setup_trampoline(); |
952 | 966 | ||
967 | ++cpucount; | ||
968 | alternatives_smp_switch(1); | ||
969 | |||
953 | /* So we see what's up */ | 970 | /* So we see what's up */ |
954 | printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip); | 971 | printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip); |
955 | /* Stack for startup_32 can be just as for start_secondary onwards */ | 972 | /* Stack for startup_32 can be just as for start_secondary onwards */ |
956 | stack_start.esp = (void *) idle->thread.esp; | 973 | stack_start.esp = (void *) idle->thread.esp; |
957 | 974 | ||
975 | start_pda = cpu_pda(cpu); | ||
976 | cpu_gdt_descr = per_cpu(cpu_gdt_descr, cpu); | ||
977 | |||
958 | irq_ctx_init(cpu); | 978 | irq_ctx_init(cpu); |
959 | 979 | ||
960 | x86_cpu_to_apicid[cpu] = apicid; | 980 | x86_cpu_to_apicid[cpu] = apicid; |
@@ -1049,13 +1069,15 @@ void cpu_exit_clear(void) | |||
1049 | 1069 | ||
1050 | struct warm_boot_cpu_info { | 1070 | struct warm_boot_cpu_info { |
1051 | struct completion *complete; | 1071 | struct completion *complete; |
1072 | struct work_struct task; | ||
1052 | int apicid; | 1073 | int apicid; |
1053 | int cpu; | 1074 | int cpu; |
1054 | }; | 1075 | }; |
1055 | 1076 | ||
1056 | static void __cpuinit do_warm_boot_cpu(void *p) | 1077 | static void __cpuinit do_warm_boot_cpu(struct work_struct *work) |
1057 | { | 1078 | { |
1058 | struct warm_boot_cpu_info *info = p; | 1079 | struct warm_boot_cpu_info *info = |
1080 | container_of(work, struct warm_boot_cpu_info, task); | ||
1059 | do_boot_cpu(info->apicid, info->cpu); | 1081 | do_boot_cpu(info->apicid, info->cpu); |
1060 | complete(info->complete); | 1082 | complete(info->complete); |
1061 | } | 1083 | } |
@@ -1064,7 +1086,6 @@ static int __cpuinit __smp_prepare_cpu(int cpu) | |||
1064 | { | 1086 | { |
1065 | DECLARE_COMPLETION_ONSTACK(done); | 1087 | DECLARE_COMPLETION_ONSTACK(done); |
1066 | struct warm_boot_cpu_info info; | 1088 | struct warm_boot_cpu_info info; |
1067 | struct work_struct task; | ||
1068 | int apicid, ret; | 1089 | int apicid, ret; |
1069 | struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu); | 1090 | struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu); |
1070 | 1091 | ||
@@ -1089,15 +1110,15 @@ static int __cpuinit __smp_prepare_cpu(int cpu) | |||
1089 | info.complete = &done; | 1110 | info.complete = &done; |
1090 | info.apicid = apicid; | 1111 | info.apicid = apicid; |
1091 | info.cpu = cpu; | 1112 | info.cpu = cpu; |
1092 | INIT_WORK(&task, do_warm_boot_cpu, &info); | 1113 | INIT_WORK(&info.task, do_warm_boot_cpu); |
1093 | 1114 | ||
1094 | tsc_sync_disabled = 1; | 1115 | tsc_sync_disabled = 1; |
1095 | 1116 | ||
1096 | /* init low mem mapping */ | 1117 | /* init low mem mapping */ |
1097 | clone_pgd_range(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS, | 1118 | clone_pgd_range(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS, |
1098 | KERNEL_PGD_PTRS); | 1119 | min_t(unsigned long, KERNEL_PGD_PTRS, USER_PGD_PTRS)); |
1099 | flush_tlb_all(); | 1120 | flush_tlb_all(); |
1100 | schedule_work(&task); | 1121 | schedule_work(&info.task); |
1101 | wait_for_completion(&done); | 1122 | wait_for_completion(&done); |
1102 | 1123 | ||
1103 | tsc_sync_disabled = 0; | 1124 | tsc_sync_disabled = 0; |
@@ -1108,34 +1129,15 @@ exit: | |||
1108 | } | 1129 | } |
1109 | #endif | 1130 | #endif |
1110 | 1131 | ||
1111 | static void smp_tune_scheduling (void) | 1132 | static void smp_tune_scheduling(void) |
1112 | { | 1133 | { |
1113 | unsigned long cachesize; /* kB */ | 1134 | unsigned long cachesize; /* kB */ |
1114 | unsigned long bandwidth = 350; /* MB/s */ | ||
1115 | /* | ||
1116 | * Rough estimation for SMP scheduling, this is the number of | ||
1117 | * cycles it takes for a fully memory-limited process to flush | ||
1118 | * the SMP-local cache. | ||
1119 | * | ||
1120 | * (For a P5 this pretty much means we will choose another idle | ||
1121 | * CPU almost always at wakeup time (this is due to the small | ||
1122 | * L1 cache), on PIIs it's around 50-100 usecs, depending on | ||
1123 | * the cache size) | ||
1124 | */ | ||
1125 | 1135 | ||
1126 | if (!cpu_khz) { | 1136 | if (cpu_khz) { |
1127 | /* | ||
1128 | * this basically disables processor-affinity | ||
1129 | * scheduling on SMP without a TSC. | ||
1130 | */ | ||
1131 | return; | ||
1132 | } else { | ||
1133 | cachesize = boot_cpu_data.x86_cache_size; | 1137 | cachesize = boot_cpu_data.x86_cache_size; |
1134 | if (cachesize == -1) { | 1138 | |
1135 | cachesize = 16; /* Pentiums, 2x8kB cache */ | 1139 | if (cachesize > 0) |
1136 | bandwidth = 100; | 1140 | max_cache_size = cachesize * 1024; |
1137 | } | ||
1138 | max_cache_size = cachesize * 1024; | ||
1139 | } | 1141 | } |
1140 | } | 1142 | } |
1141 | 1143 | ||
@@ -1430,7 +1432,7 @@ void __cpu_die(unsigned int cpu) | |||
1430 | } | 1432 | } |
1431 | #endif /* CONFIG_HOTPLUG_CPU */ | 1433 | #endif /* CONFIG_HOTPLUG_CPU */ |
1432 | 1434 | ||
1433 | int __devinit __cpu_up(unsigned int cpu) | 1435 | int __cpuinit __cpu_up(unsigned int cpu) |
1434 | { | 1436 | { |
1435 | #ifdef CONFIG_HOTPLUG_CPU | 1437 | #ifdef CONFIG_HOTPLUG_CPU |
1436 | int ret=0; | 1438 | int ret=0; |
@@ -1461,6 +1463,12 @@ int __devinit __cpu_up(unsigned int cpu) | |||
1461 | cpu_set(cpu, smp_commenced_mask); | 1463 | cpu_set(cpu, smp_commenced_mask); |
1462 | while (!cpu_isset(cpu, cpu_online_map)) | 1464 | while (!cpu_isset(cpu, cpu_online_map)) |
1463 | cpu_relax(); | 1465 | cpu_relax(); |
1466 | |||
1467 | #ifdef CONFIG_X86_GENERICARCH | ||
1468 | if (num_online_cpus() > 8 && genapic == &apic_default) | ||
1469 | panic("Default flat APIC routing can't be used with > 8 cpus\n"); | ||
1470 | #endif | ||
1471 | |||
1464 | return 0; | 1472 | return 0; |
1465 | } | 1473 | } |
1466 | 1474 | ||