diff options
Diffstat (limited to 'arch/i386/kernel/smpboot.c')
-rw-r--r-- | arch/i386/kernel/smpboot.c | 84 |
1 files changed, 47 insertions, 37 deletions
diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index 4bb8b77cd65b..b0f84e5778ad 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c | |||
@@ -33,6 +33,11 @@ | |||
33 | * Dave Jones : Report invalid combinations of Athlon CPUs. | 33 | * Dave Jones : Report invalid combinations of Athlon CPUs. |
34 | * Rusty Russell : Hacked into shape for new "hotplug" boot process. */ | 34 | * Rusty Russell : Hacked into shape for new "hotplug" boot process. */ |
35 | 35 | ||
36 | |||
37 | /* SMP boot always wants to use real time delay to allow sufficient time for | ||
38 | * the APs to come online */ | ||
39 | #define USE_REAL_TIME_DELAY | ||
40 | |||
36 | #include <linux/module.h> | 41 | #include <linux/module.h> |
37 | #include <linux/init.h> | 42 | #include <linux/init.h> |
38 | #include <linux/kernel.h> | 43 | #include <linux/kernel.h> |
@@ -52,6 +57,8 @@ | |||
52 | #include <asm/desc.h> | 57 | #include <asm/desc.h> |
53 | #include <asm/arch_hooks.h> | 58 | #include <asm/arch_hooks.h> |
54 | #include <asm/nmi.h> | 59 | #include <asm/nmi.h> |
60 | #include <asm/pda.h> | ||
61 | #include <asm/genapic.h> | ||
55 | 62 | ||
56 | #include <mach_apic.h> | 63 | #include <mach_apic.h> |
57 | #include <mach_wakecpu.h> | 64 | #include <mach_wakecpu.h> |
@@ -62,7 +69,7 @@ static int __devinitdata smp_b_stepping; | |||
62 | 69 | ||
63 | /* Number of siblings per CPU package */ | 70 | /* Number of siblings per CPU package */ |
64 | int smp_num_siblings = 1; | 71 | int smp_num_siblings = 1; |
65 | #ifdef CONFIG_X86_HT | 72 | #ifdef CONFIG_SMP |
66 | EXPORT_SYMBOL(smp_num_siblings); | 73 | EXPORT_SYMBOL(smp_num_siblings); |
67 | #endif | 74 | #endif |
68 | 75 | ||
@@ -536,11 +543,11 @@ set_cpu_sibling_map(int cpu) | |||
536 | static void __devinit start_secondary(void *unused) | 543 | static void __devinit start_secondary(void *unused) |
537 | { | 544 | { |
538 | /* | 545 | /* |
539 | * Dont put anything before smp_callin(), SMP | 546 | * Don't put *anything* before secondary_cpu_init(), SMP |
540 | * booting is too fragile that we want to limit the | 547 | * booting is too fragile that we want to limit the |
541 | * things done here to the most necessary things. | 548 | * things done here to the most necessary things. |
542 | */ | 549 | */ |
543 | cpu_init(); | 550 | secondary_cpu_init(); |
544 | preempt_disable(); | 551 | preempt_disable(); |
545 | smp_callin(); | 552 | smp_callin(); |
546 | while (!cpu_isset(smp_processor_id(), smp_commenced_mask)) | 553 | while (!cpu_isset(smp_processor_id(), smp_commenced_mask)) |
@@ -599,13 +606,16 @@ void __devinit initialize_secondary(void) | |||
599 | "movl %0,%%esp\n\t" | 606 | "movl %0,%%esp\n\t" |
600 | "jmp *%1" | 607 | "jmp *%1" |
601 | : | 608 | : |
602 | :"r" (current->thread.esp),"r" (current->thread.eip)); | 609 | :"m" (current->thread.esp),"m" (current->thread.eip)); |
603 | } | 610 | } |
604 | 611 | ||
612 | /* Static state in head.S used to set up a CPU */ | ||
605 | extern struct { | 613 | extern struct { |
606 | void * esp; | 614 | void * esp; |
607 | unsigned short ss; | 615 | unsigned short ss; |
608 | } stack_start; | 616 | } stack_start; |
617 | extern struct i386_pda *start_pda; | ||
618 | extern struct Xgt_desc_struct cpu_gdt_descr; | ||
609 | 619 | ||
610 | #ifdef CONFIG_NUMA | 620 | #ifdef CONFIG_NUMA |
611 | 621 | ||
@@ -936,9 +946,6 @@ static int __devinit do_boot_cpu(int apicid, int cpu) | |||
936 | unsigned long start_eip; | 946 | unsigned long start_eip; |
937 | unsigned short nmi_high = 0, nmi_low = 0; | 947 | unsigned short nmi_high = 0, nmi_low = 0; |
938 | 948 | ||
939 | ++cpucount; | ||
940 | alternatives_smp_switch(1); | ||
941 | |||
942 | /* | 949 | /* |
943 | * We can't use kernel_thread since we must avoid to | 950 | * We can't use kernel_thread since we must avoid to |
944 | * reschedule the child. | 951 | * reschedule the child. |
@@ -946,15 +953,30 @@ static int __devinit do_boot_cpu(int apicid, int cpu) | |||
946 | idle = alloc_idle_task(cpu); | 953 | idle = alloc_idle_task(cpu); |
947 | if (IS_ERR(idle)) | 954 | if (IS_ERR(idle)) |
948 | panic("failed fork for CPU %d", cpu); | 955 | panic("failed fork for CPU %d", cpu); |
956 | |||
957 | /* Pre-allocate and initialize the CPU's GDT and PDA so it | ||
958 | doesn't have to do any memory allocation during the | ||
959 | delicate CPU-bringup phase. */ | ||
960 | if (!init_gdt(cpu, idle)) { | ||
961 | printk(KERN_INFO "Couldn't allocate GDT/PDA for CPU %d\n", cpu); | ||
962 | return -1; /* ? */ | ||
963 | } | ||
964 | |||
949 | idle->thread.eip = (unsigned long) start_secondary; | 965 | idle->thread.eip = (unsigned long) start_secondary; |
950 | /* start_eip had better be page-aligned! */ | 966 | /* start_eip had better be page-aligned! */ |
951 | start_eip = setup_trampoline(); | 967 | start_eip = setup_trampoline(); |
952 | 968 | ||
969 | ++cpucount; | ||
970 | alternatives_smp_switch(1); | ||
971 | |||
953 | /* So we see what's up */ | 972 | /* So we see what's up */ |
954 | printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip); | 973 | printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip); |
955 | /* Stack for startup_32 can be just as for start_secondary onwards */ | 974 | /* Stack for startup_32 can be just as for start_secondary onwards */ |
956 | stack_start.esp = (void *) idle->thread.esp; | 975 | stack_start.esp = (void *) idle->thread.esp; |
957 | 976 | ||
977 | start_pda = cpu_pda(cpu); | ||
978 | cpu_gdt_descr = per_cpu(cpu_gdt_descr, cpu); | ||
979 | |||
958 | irq_ctx_init(cpu); | 980 | irq_ctx_init(cpu); |
959 | 981 | ||
960 | x86_cpu_to_apicid[cpu] = apicid; | 982 | x86_cpu_to_apicid[cpu] = apicid; |
@@ -1049,13 +1071,15 @@ void cpu_exit_clear(void) | |||
1049 | 1071 | ||
1050 | struct warm_boot_cpu_info { | 1072 | struct warm_boot_cpu_info { |
1051 | struct completion *complete; | 1073 | struct completion *complete; |
1074 | struct work_struct task; | ||
1052 | int apicid; | 1075 | int apicid; |
1053 | int cpu; | 1076 | int cpu; |
1054 | }; | 1077 | }; |
1055 | 1078 | ||
1056 | static void __cpuinit do_warm_boot_cpu(void *p) | 1079 | static void __cpuinit do_warm_boot_cpu(struct work_struct *work) |
1057 | { | 1080 | { |
1058 | struct warm_boot_cpu_info *info = p; | 1081 | struct warm_boot_cpu_info *info = |
1082 | container_of(work, struct warm_boot_cpu_info, task); | ||
1059 | do_boot_cpu(info->apicid, info->cpu); | 1083 | do_boot_cpu(info->apicid, info->cpu); |
1060 | complete(info->complete); | 1084 | complete(info->complete); |
1061 | } | 1085 | } |
@@ -1064,7 +1088,6 @@ static int __cpuinit __smp_prepare_cpu(int cpu) | |||
1064 | { | 1088 | { |
1065 | DECLARE_COMPLETION_ONSTACK(done); | 1089 | DECLARE_COMPLETION_ONSTACK(done); |
1066 | struct warm_boot_cpu_info info; | 1090 | struct warm_boot_cpu_info info; |
1067 | struct work_struct task; | ||
1068 | int apicid, ret; | 1091 | int apicid, ret; |
1069 | struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu); | 1092 | struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu); |
1070 | 1093 | ||
@@ -1089,15 +1112,15 @@ static int __cpuinit __smp_prepare_cpu(int cpu) | |||
1089 | info.complete = &done; | 1112 | info.complete = &done; |
1090 | info.apicid = apicid; | 1113 | info.apicid = apicid; |
1091 | info.cpu = cpu; | 1114 | info.cpu = cpu; |
1092 | INIT_WORK(&task, do_warm_boot_cpu, &info); | 1115 | INIT_WORK(&info.task, do_warm_boot_cpu); |
1093 | 1116 | ||
1094 | tsc_sync_disabled = 1; | 1117 | tsc_sync_disabled = 1; |
1095 | 1118 | ||
1096 | /* init low mem mapping */ | 1119 | /* init low mem mapping */ |
1097 | clone_pgd_range(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS, | 1120 | clone_pgd_range(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS, |
1098 | KERNEL_PGD_PTRS); | 1121 | min_t(unsigned long, KERNEL_PGD_PTRS, USER_PGD_PTRS)); |
1099 | flush_tlb_all(); | 1122 | flush_tlb_all(); |
1100 | schedule_work(&task); | 1123 | schedule_work(&info.task); |
1101 | wait_for_completion(&done); | 1124 | wait_for_completion(&done); |
1102 | 1125 | ||
1103 | tsc_sync_disabled = 0; | 1126 | tsc_sync_disabled = 0; |
@@ -1108,34 +1131,15 @@ exit: | |||
1108 | } | 1131 | } |
1109 | #endif | 1132 | #endif |
1110 | 1133 | ||
1111 | static void smp_tune_scheduling (void) | 1134 | static void smp_tune_scheduling(void) |
1112 | { | 1135 | { |
1113 | unsigned long cachesize; /* kB */ | 1136 | unsigned long cachesize; /* kB */ |
1114 | unsigned long bandwidth = 350; /* MB/s */ | ||
1115 | /* | ||
1116 | * Rough estimation for SMP scheduling, this is the number of | ||
1117 | * cycles it takes for a fully memory-limited process to flush | ||
1118 | * the SMP-local cache. | ||
1119 | * | ||
1120 | * (For a P5 this pretty much means we will choose another idle | ||
1121 | * CPU almost always at wakeup time (this is due to the small | ||
1122 | * L1 cache), on PIIs it's around 50-100 usecs, depending on | ||
1123 | * the cache size) | ||
1124 | */ | ||
1125 | 1137 | ||
1126 | if (!cpu_khz) { | 1138 | if (cpu_khz) { |
1127 | /* | ||
1128 | * this basically disables processor-affinity | ||
1129 | * scheduling on SMP without a TSC. | ||
1130 | */ | ||
1131 | return; | ||
1132 | } else { | ||
1133 | cachesize = boot_cpu_data.x86_cache_size; | 1139 | cachesize = boot_cpu_data.x86_cache_size; |
1134 | if (cachesize == -1) { | 1140 | |
1135 | cachesize = 16; /* Pentiums, 2x8kB cache */ | 1141 | if (cachesize > 0) |
1136 | bandwidth = 100; | 1142 | max_cache_size = cachesize * 1024; |
1137 | } | ||
1138 | max_cache_size = cachesize * 1024; | ||
1139 | } | 1143 | } |
1140 | } | 1144 | } |
1141 | 1145 | ||
@@ -1461,6 +1465,12 @@ int __devinit __cpu_up(unsigned int cpu) | |||
1461 | cpu_set(cpu, smp_commenced_mask); | 1465 | cpu_set(cpu, smp_commenced_mask); |
1462 | while (!cpu_isset(cpu, cpu_online_map)) | 1466 | while (!cpu_isset(cpu, cpu_online_map)) |
1463 | cpu_relax(); | 1467 | cpu_relax(); |
1468 | |||
1469 | #ifdef CONFIG_X86_GENERICARCH | ||
1470 | if (num_online_cpus() > 8 && genapic == &apic_default) | ||
1471 | panic("Default flat APIC routing can't be used with > 8 cpus\n"); | ||
1472 | #endif | ||
1473 | |||
1464 | return 0; | 1474 | return 0; |
1465 | } | 1475 | } |
1466 | 1476 | ||