aboutsummaryrefslogtreecommitdiffstats
path: root/arch/i386/kernel/smpboot.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/i386/kernel/smpboot.c')
-rw-r--r--arch/i386/kernel/smpboot.c84
1 files changed, 47 insertions, 37 deletions
diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c
index 4bb8b77cd65b..b0f84e5778ad 100644
--- a/arch/i386/kernel/smpboot.c
+++ b/arch/i386/kernel/smpboot.c
@@ -33,6 +33,11 @@
33 * Dave Jones : Report invalid combinations of Athlon CPUs. 33 * Dave Jones : Report invalid combinations of Athlon CPUs.
34* Rusty Russell : Hacked into shape for new "hotplug" boot process. */ 34* Rusty Russell : Hacked into shape for new "hotplug" boot process. */
35 35
36
37/* SMP boot always wants to use real time delay to allow sufficient time for
38 * the APs to come online */
39#define USE_REAL_TIME_DELAY
40
36#include <linux/module.h> 41#include <linux/module.h>
37#include <linux/init.h> 42#include <linux/init.h>
38#include <linux/kernel.h> 43#include <linux/kernel.h>
@@ -52,6 +57,8 @@
52#include <asm/desc.h> 57#include <asm/desc.h>
53#include <asm/arch_hooks.h> 58#include <asm/arch_hooks.h>
54#include <asm/nmi.h> 59#include <asm/nmi.h>
60#include <asm/pda.h>
61#include <asm/genapic.h>
55 62
56#include <mach_apic.h> 63#include <mach_apic.h>
57#include <mach_wakecpu.h> 64#include <mach_wakecpu.h>
@@ -62,7 +69,7 @@ static int __devinitdata smp_b_stepping;
62 69
63/* Number of siblings per CPU package */ 70/* Number of siblings per CPU package */
64int smp_num_siblings = 1; 71int smp_num_siblings = 1;
65#ifdef CONFIG_X86_HT 72#ifdef CONFIG_SMP
66EXPORT_SYMBOL(smp_num_siblings); 73EXPORT_SYMBOL(smp_num_siblings);
67#endif 74#endif
68 75
@@ -536,11 +543,11 @@ set_cpu_sibling_map(int cpu)
536static void __devinit start_secondary(void *unused) 543static void __devinit start_secondary(void *unused)
537{ 544{
538 /* 545 /*
539 * Dont put anything before smp_callin(), SMP 546 * Don't put *anything* before secondary_cpu_init(), SMP
540 * booting is too fragile that we want to limit the 547 * booting is too fragile that we want to limit the
541 * things done here to the most necessary things. 548 * things done here to the most necessary things.
542 */ 549 */
543 cpu_init(); 550 secondary_cpu_init();
544 preempt_disable(); 551 preempt_disable();
545 smp_callin(); 552 smp_callin();
546 while (!cpu_isset(smp_processor_id(), smp_commenced_mask)) 553 while (!cpu_isset(smp_processor_id(), smp_commenced_mask))
@@ -599,13 +606,16 @@ void __devinit initialize_secondary(void)
599 "movl %0,%%esp\n\t" 606 "movl %0,%%esp\n\t"
600 "jmp *%1" 607 "jmp *%1"
601 : 608 :
602 :"r" (current->thread.esp),"r" (current->thread.eip)); 609 :"m" (current->thread.esp),"m" (current->thread.eip));
603} 610}
604 611
612/* Static state in head.S used to set up a CPU */
605extern struct { 613extern struct {
606 void * esp; 614 void * esp;
607 unsigned short ss; 615 unsigned short ss;
608} stack_start; 616} stack_start;
617extern struct i386_pda *start_pda;
618extern struct Xgt_desc_struct cpu_gdt_descr;
609 619
610#ifdef CONFIG_NUMA 620#ifdef CONFIG_NUMA
611 621
@@ -936,9 +946,6 @@ static int __devinit do_boot_cpu(int apicid, int cpu)
936 unsigned long start_eip; 946 unsigned long start_eip;
937 unsigned short nmi_high = 0, nmi_low = 0; 947 unsigned short nmi_high = 0, nmi_low = 0;
938 948
939 ++cpucount;
940 alternatives_smp_switch(1);
941
942 /* 949 /*
943 * We can't use kernel_thread since we must avoid to 950 * We can't use kernel_thread since we must avoid to
944 * reschedule the child. 951 * reschedule the child.
@@ -946,15 +953,30 @@ static int __devinit do_boot_cpu(int apicid, int cpu)
946 idle = alloc_idle_task(cpu); 953 idle = alloc_idle_task(cpu);
947 if (IS_ERR(idle)) 954 if (IS_ERR(idle))
948 panic("failed fork for CPU %d", cpu); 955 panic("failed fork for CPU %d", cpu);
956
957 /* Pre-allocate and initialize the CPU's GDT and PDA so it
958 doesn't have to do any memory allocation during the
959 delicate CPU-bringup phase. */
960 if (!init_gdt(cpu, idle)) {
961 printk(KERN_INFO "Couldn't allocate GDT/PDA for CPU %d\n", cpu);
962 return -1; /* ? */
963 }
964
949 idle->thread.eip = (unsigned long) start_secondary; 965 idle->thread.eip = (unsigned long) start_secondary;
950 /* start_eip had better be page-aligned! */ 966 /* start_eip had better be page-aligned! */
951 start_eip = setup_trampoline(); 967 start_eip = setup_trampoline();
952 968
969 ++cpucount;
970 alternatives_smp_switch(1);
971
953 /* So we see what's up */ 972 /* So we see what's up */
954 printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip); 973 printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip);
955 /* Stack for startup_32 can be just as for start_secondary onwards */ 974 /* Stack for startup_32 can be just as for start_secondary onwards */
956 stack_start.esp = (void *) idle->thread.esp; 975 stack_start.esp = (void *) idle->thread.esp;
957 976
977 start_pda = cpu_pda(cpu);
978 cpu_gdt_descr = per_cpu(cpu_gdt_descr, cpu);
979
958 irq_ctx_init(cpu); 980 irq_ctx_init(cpu);
959 981
960 x86_cpu_to_apicid[cpu] = apicid; 982 x86_cpu_to_apicid[cpu] = apicid;
@@ -1049,13 +1071,15 @@ void cpu_exit_clear(void)
1049 1071
1050struct warm_boot_cpu_info { 1072struct warm_boot_cpu_info {
1051 struct completion *complete; 1073 struct completion *complete;
1074 struct work_struct task;
1052 int apicid; 1075 int apicid;
1053 int cpu; 1076 int cpu;
1054}; 1077};
1055 1078
1056static void __cpuinit do_warm_boot_cpu(void *p) 1079static void __cpuinit do_warm_boot_cpu(struct work_struct *work)
1057{ 1080{
1058 struct warm_boot_cpu_info *info = p; 1081 struct warm_boot_cpu_info *info =
1082 container_of(work, struct warm_boot_cpu_info, task);
1059 do_boot_cpu(info->apicid, info->cpu); 1083 do_boot_cpu(info->apicid, info->cpu);
1060 complete(info->complete); 1084 complete(info->complete);
1061} 1085}
@@ -1064,7 +1088,6 @@ static int __cpuinit __smp_prepare_cpu(int cpu)
1064{ 1088{
1065 DECLARE_COMPLETION_ONSTACK(done); 1089 DECLARE_COMPLETION_ONSTACK(done);
1066 struct warm_boot_cpu_info info; 1090 struct warm_boot_cpu_info info;
1067 struct work_struct task;
1068 int apicid, ret; 1091 int apicid, ret;
1069 struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu); 1092 struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
1070 1093
@@ -1089,15 +1112,15 @@ static int __cpuinit __smp_prepare_cpu(int cpu)
1089 info.complete = &done; 1112 info.complete = &done;
1090 info.apicid = apicid; 1113 info.apicid = apicid;
1091 info.cpu = cpu; 1114 info.cpu = cpu;
1092 INIT_WORK(&task, do_warm_boot_cpu, &info); 1115 INIT_WORK(&info.task, do_warm_boot_cpu);
1093 1116
1094 tsc_sync_disabled = 1; 1117 tsc_sync_disabled = 1;
1095 1118
1096 /* init low mem mapping */ 1119 /* init low mem mapping */
1097 clone_pgd_range(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS, 1120 clone_pgd_range(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS,
1098 KERNEL_PGD_PTRS); 1121 min_t(unsigned long, KERNEL_PGD_PTRS, USER_PGD_PTRS));
1099 flush_tlb_all(); 1122 flush_tlb_all();
1100 schedule_work(&task); 1123 schedule_work(&info.task);
1101 wait_for_completion(&done); 1124 wait_for_completion(&done);
1102 1125
1103 tsc_sync_disabled = 0; 1126 tsc_sync_disabled = 0;
@@ -1108,34 +1131,15 @@ exit:
1108} 1131}
1109#endif 1132#endif
1110 1133
1111static void smp_tune_scheduling (void) 1134static void smp_tune_scheduling(void)
1112{ 1135{
1113 unsigned long cachesize; /* kB */ 1136 unsigned long cachesize; /* kB */
1114 unsigned long bandwidth = 350; /* MB/s */
1115 /*
1116 * Rough estimation for SMP scheduling, this is the number of
1117 * cycles it takes for a fully memory-limited process to flush
1118 * the SMP-local cache.
1119 *
1120 * (For a P5 this pretty much means we will choose another idle
1121 * CPU almost always at wakeup time (this is due to the small
1122 * L1 cache), on PIIs it's around 50-100 usecs, depending on
1123 * the cache size)
1124 */
1125 1137
1126 if (!cpu_khz) { 1138 if (cpu_khz) {
1127 /*
1128 * this basically disables processor-affinity
1129 * scheduling on SMP without a TSC.
1130 */
1131 return;
1132 } else {
1133 cachesize = boot_cpu_data.x86_cache_size; 1139 cachesize = boot_cpu_data.x86_cache_size;
1134 if (cachesize == -1) { 1140
1135 cachesize = 16; /* Pentiums, 2x8kB cache */ 1141 if (cachesize > 0)
1136 bandwidth = 100; 1142 max_cache_size = cachesize * 1024;
1137 }
1138 max_cache_size = cachesize * 1024;
1139 } 1143 }
1140} 1144}
1141 1145
@@ -1461,6 +1465,12 @@ int __devinit __cpu_up(unsigned int cpu)
1461 cpu_set(cpu, smp_commenced_mask); 1465 cpu_set(cpu, smp_commenced_mask);
1462 while (!cpu_isset(cpu, cpu_online_map)) 1466 while (!cpu_isset(cpu, cpu_online_map))
1463 cpu_relax(); 1467 cpu_relax();
1468
1469#ifdef CONFIG_X86_GENERICARCH
1470 if (num_online_cpus() > 8 && genapic == &apic_default)
1471 panic("Default flat APIC routing can't be used with > 8 cpus\n");
1472#endif
1473
1464 return 0; 1474 return 0;
1465} 1475}
1466 1476