aboutsummaryrefslogtreecommitdiffstats
path: root/arch/i386/kernel/smpboot.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/i386/kernel/smpboot.c')
-rw-r--r--arch/i386/kernel/smpboot.c98
1 files changed, 53 insertions, 45 deletions
diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c
index 4bb8b77cd65b..dea7ef9d3e82 100644
--- a/arch/i386/kernel/smpboot.c
+++ b/arch/i386/kernel/smpboot.c
@@ -33,6 +33,11 @@
33 * Dave Jones : Report invalid combinations of Athlon CPUs. 33 * Dave Jones : Report invalid combinations of Athlon CPUs.
34* Rusty Russell : Hacked into shape for new "hotplug" boot process. */ 34* Rusty Russell : Hacked into shape for new "hotplug" boot process. */
35 35
36
37/* SMP boot always wants to use real time delay to allow sufficient time for
38 * the APs to come online */
39#define USE_REAL_TIME_DELAY
40
36#include <linux/module.h> 41#include <linux/module.h>
37#include <linux/init.h> 42#include <linux/init.h>
38#include <linux/kernel.h> 43#include <linux/kernel.h>
@@ -52,6 +57,8 @@
52#include <asm/desc.h> 57#include <asm/desc.h>
53#include <asm/arch_hooks.h> 58#include <asm/arch_hooks.h>
54#include <asm/nmi.h> 59#include <asm/nmi.h>
60#include <asm/pda.h>
61#include <asm/genapic.h>
55 62
56#include <mach_apic.h> 63#include <mach_apic.h>
57#include <mach_wakecpu.h> 64#include <mach_wakecpu.h>
@@ -62,9 +69,7 @@ static int __devinitdata smp_b_stepping;
62 69
63/* Number of siblings per CPU package */ 70/* Number of siblings per CPU package */
64int smp_num_siblings = 1; 71int smp_num_siblings = 1;
65#ifdef CONFIG_X86_HT
66EXPORT_SYMBOL(smp_num_siblings); 72EXPORT_SYMBOL(smp_num_siblings);
67#endif
68 73
69/* Last level cache ID of each logical CPU */ 74/* Last level cache ID of each logical CPU */
70int cpu_llc_id[NR_CPUS] __cpuinitdata = {[0 ... NR_CPUS-1] = BAD_APICID}; 75int cpu_llc_id[NR_CPUS] __cpuinitdata = {[0 ... NR_CPUS-1] = BAD_APICID};
@@ -154,7 +159,7 @@ void __init smp_alloc_memory(void)
154 * a given CPU 159 * a given CPU
155 */ 160 */
156 161
157static void __devinit smp_store_cpu_info(int id) 162static void __cpuinit smp_store_cpu_info(int id)
158{ 163{
159 struct cpuinfo_x86 *c = cpu_data + id; 164 struct cpuinfo_x86 *c = cpu_data + id;
160 165
@@ -222,7 +227,7 @@ static struct {
222 atomic_t count_start; 227 atomic_t count_start;
223 atomic_t count_stop; 228 atomic_t count_stop;
224 unsigned long long values[NR_CPUS]; 229 unsigned long long values[NR_CPUS];
225} tsc __initdata = { 230} tsc __cpuinitdata = {
226 .start_flag = ATOMIC_INIT(0), 231 .start_flag = ATOMIC_INIT(0),
227 .count_start = ATOMIC_INIT(0), 232 .count_start = ATOMIC_INIT(0),
228 .count_stop = ATOMIC_INIT(0), 233 .count_stop = ATOMIC_INIT(0),
@@ -327,7 +332,7 @@ static void __init synchronize_tsc_bp(void)
327 printk("passed.\n"); 332 printk("passed.\n");
328} 333}
329 334
330static void __init synchronize_tsc_ap(void) 335static void __cpuinit synchronize_tsc_ap(void)
331{ 336{
332 int i; 337 int i;
333 338
@@ -359,7 +364,7 @@ extern void calibrate_delay(void);
359 364
360static atomic_t init_deasserted; 365static atomic_t init_deasserted;
361 366
362static void __devinit smp_callin(void) 367static void __cpuinit smp_callin(void)
363{ 368{
364 int cpuid, phys_id; 369 int cpuid, phys_id;
365 unsigned long timeout; 370 unsigned long timeout;
@@ -533,14 +538,14 @@ set_cpu_sibling_map(int cpu)
533/* 538/*
534 * Activate a secondary processor. 539 * Activate a secondary processor.
535 */ 540 */
536static void __devinit start_secondary(void *unused) 541static void __cpuinit start_secondary(void *unused)
537{ 542{
538 /* 543 /*
539 * Dont put anything before smp_callin(), SMP 544 * Don't put *anything* before secondary_cpu_init(), SMP
540 * booting is too fragile that we want to limit the 545 * booting is too fragile that we want to limit the
541 * things done here to the most necessary things. 546 * things done here to the most necessary things.
542 */ 547 */
543 cpu_init(); 548 secondary_cpu_init();
544 preempt_disable(); 549 preempt_disable();
545 smp_callin(); 550 smp_callin();
546 while (!cpu_isset(smp_processor_id(), smp_commenced_mask)) 551 while (!cpu_isset(smp_processor_id(), smp_commenced_mask))
@@ -599,13 +604,16 @@ void __devinit initialize_secondary(void)
599 "movl %0,%%esp\n\t" 604 "movl %0,%%esp\n\t"
600 "jmp *%1" 605 "jmp *%1"
601 : 606 :
602 :"r" (current->thread.esp),"r" (current->thread.eip)); 607 :"m" (current->thread.esp),"m" (current->thread.eip));
603} 608}
604 609
610/* Static state in head.S used to set up a CPU */
605extern struct { 611extern struct {
606 void * esp; 612 void * esp;
607 unsigned short ss; 613 unsigned short ss;
608} stack_start; 614} stack_start;
615extern struct i386_pda *start_pda;
616extern struct Xgt_desc_struct cpu_gdt_descr;
609 617
610#ifdef CONFIG_NUMA 618#ifdef CONFIG_NUMA
611 619
@@ -923,7 +931,7 @@ static inline struct task_struct * alloc_idle_task(int cpu)
923#define alloc_idle_task(cpu) fork_idle(cpu) 931#define alloc_idle_task(cpu) fork_idle(cpu)
924#endif 932#endif
925 933
926static int __devinit do_boot_cpu(int apicid, int cpu) 934static int __cpuinit do_boot_cpu(int apicid, int cpu)
927/* 935/*
928 * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad 936 * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
929 * (ie clustered apic addressing mode), this is a LOGICAL apic ID. 937 * (ie clustered apic addressing mode), this is a LOGICAL apic ID.
@@ -936,9 +944,6 @@ static int __devinit do_boot_cpu(int apicid, int cpu)
936 unsigned long start_eip; 944 unsigned long start_eip;
937 unsigned short nmi_high = 0, nmi_low = 0; 945 unsigned short nmi_high = 0, nmi_low = 0;
938 946
939 ++cpucount;
940 alternatives_smp_switch(1);
941
942 /* 947 /*
943 * We can't use kernel_thread since we must avoid to 948 * We can't use kernel_thread since we must avoid to
944 * reschedule the child. 949 * reschedule the child.
@@ -946,15 +951,30 @@ static int __devinit do_boot_cpu(int apicid, int cpu)
946 idle = alloc_idle_task(cpu); 951 idle = alloc_idle_task(cpu);
947 if (IS_ERR(idle)) 952 if (IS_ERR(idle))
948 panic("failed fork for CPU %d", cpu); 953 panic("failed fork for CPU %d", cpu);
954
955 /* Pre-allocate and initialize the CPU's GDT and PDA so it
956 doesn't have to do any memory allocation during the
957 delicate CPU-bringup phase. */
958 if (!init_gdt(cpu, idle)) {
959 printk(KERN_INFO "Couldn't allocate GDT/PDA for CPU %d\n", cpu);
960 return -1; /* ? */
961 }
962
949 idle->thread.eip = (unsigned long) start_secondary; 963 idle->thread.eip = (unsigned long) start_secondary;
950 /* start_eip had better be page-aligned! */ 964 /* start_eip had better be page-aligned! */
951 start_eip = setup_trampoline(); 965 start_eip = setup_trampoline();
952 966
967 ++cpucount;
968 alternatives_smp_switch(1);
969
953 /* So we see what's up */ 970 /* So we see what's up */
954 printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip); 971 printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip);
955 /* Stack for startup_32 can be just as for start_secondary onwards */ 972 /* Stack for startup_32 can be just as for start_secondary onwards */
956 stack_start.esp = (void *) idle->thread.esp; 973 stack_start.esp = (void *) idle->thread.esp;
957 974
975 start_pda = cpu_pda(cpu);
976 cpu_gdt_descr = per_cpu(cpu_gdt_descr, cpu);
977
958 irq_ctx_init(cpu); 978 irq_ctx_init(cpu);
959 979
960 x86_cpu_to_apicid[cpu] = apicid; 980 x86_cpu_to_apicid[cpu] = apicid;
@@ -1049,13 +1069,15 @@ void cpu_exit_clear(void)
1049 1069
1050struct warm_boot_cpu_info { 1070struct warm_boot_cpu_info {
1051 struct completion *complete; 1071 struct completion *complete;
1072 struct work_struct task;
1052 int apicid; 1073 int apicid;
1053 int cpu; 1074 int cpu;
1054}; 1075};
1055 1076
1056static void __cpuinit do_warm_boot_cpu(void *p) 1077static void __cpuinit do_warm_boot_cpu(struct work_struct *work)
1057{ 1078{
1058 struct warm_boot_cpu_info *info = p; 1079 struct warm_boot_cpu_info *info =
1080 container_of(work, struct warm_boot_cpu_info, task);
1059 do_boot_cpu(info->apicid, info->cpu); 1081 do_boot_cpu(info->apicid, info->cpu);
1060 complete(info->complete); 1082 complete(info->complete);
1061} 1083}
@@ -1064,7 +1086,6 @@ static int __cpuinit __smp_prepare_cpu(int cpu)
1064{ 1086{
1065 DECLARE_COMPLETION_ONSTACK(done); 1087 DECLARE_COMPLETION_ONSTACK(done);
1066 struct warm_boot_cpu_info info; 1088 struct warm_boot_cpu_info info;
1067 struct work_struct task;
1068 int apicid, ret; 1089 int apicid, ret;
1069 struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu); 1090 struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
1070 1091
@@ -1089,15 +1110,15 @@ static int __cpuinit __smp_prepare_cpu(int cpu)
1089 info.complete = &done; 1110 info.complete = &done;
1090 info.apicid = apicid; 1111 info.apicid = apicid;
1091 info.cpu = cpu; 1112 info.cpu = cpu;
1092 INIT_WORK(&task, do_warm_boot_cpu, &info); 1113 INIT_WORK(&info.task, do_warm_boot_cpu);
1093 1114
1094 tsc_sync_disabled = 1; 1115 tsc_sync_disabled = 1;
1095 1116
1096 /* init low mem mapping */ 1117 /* init low mem mapping */
1097 clone_pgd_range(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS, 1118 clone_pgd_range(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS,
1098 KERNEL_PGD_PTRS); 1119 min_t(unsigned long, KERNEL_PGD_PTRS, USER_PGD_PTRS));
1099 flush_tlb_all(); 1120 flush_tlb_all();
1100 schedule_work(&task); 1121 schedule_work(&info.task);
1101 wait_for_completion(&done); 1122 wait_for_completion(&done);
1102 1123
1103 tsc_sync_disabled = 0; 1124 tsc_sync_disabled = 0;
@@ -1108,34 +1129,15 @@ exit:
1108} 1129}
1109#endif 1130#endif
1110 1131
1111static void smp_tune_scheduling (void) 1132static void smp_tune_scheduling(void)
1112{ 1133{
1113 unsigned long cachesize; /* kB */ 1134 unsigned long cachesize; /* kB */
1114 unsigned long bandwidth = 350; /* MB/s */
1115 /*
1116 * Rough estimation for SMP scheduling, this is the number of
1117 * cycles it takes for a fully memory-limited process to flush
1118 * the SMP-local cache.
1119 *
1120 * (For a P5 this pretty much means we will choose another idle
1121 * CPU almost always at wakeup time (this is due to the small
1122 * L1 cache), on PIIs it's around 50-100 usecs, depending on
1123 * the cache size)
1124 */
1125 1135
1126 if (!cpu_khz) { 1136 if (cpu_khz) {
1127 /*
1128 * this basically disables processor-affinity
1129 * scheduling on SMP without a TSC.
1130 */
1131 return;
1132 } else {
1133 cachesize = boot_cpu_data.x86_cache_size; 1137 cachesize = boot_cpu_data.x86_cache_size;
1134 if (cachesize == -1) { 1138
1135 cachesize = 16; /* Pentiums, 2x8kB cache */ 1139 if (cachesize > 0)
1136 bandwidth = 100; 1140 max_cache_size = cachesize * 1024;
1137 }
1138 max_cache_size = cachesize * 1024;
1139 } 1141 }
1140} 1142}
1141 1143
@@ -1430,7 +1432,7 @@ void __cpu_die(unsigned int cpu)
1430} 1432}
1431#endif /* CONFIG_HOTPLUG_CPU */ 1433#endif /* CONFIG_HOTPLUG_CPU */
1432 1434
1433int __devinit __cpu_up(unsigned int cpu) 1435int __cpuinit __cpu_up(unsigned int cpu)
1434{ 1436{
1435#ifdef CONFIG_HOTPLUG_CPU 1437#ifdef CONFIG_HOTPLUG_CPU
1436 int ret=0; 1438 int ret=0;
@@ -1461,6 +1463,12 @@ int __devinit __cpu_up(unsigned int cpu)
1461 cpu_set(cpu, smp_commenced_mask); 1463 cpu_set(cpu, smp_commenced_mask);
1462 while (!cpu_isset(cpu, cpu_online_map)) 1464 while (!cpu_isset(cpu, cpu_online_map))
1463 cpu_relax(); 1465 cpu_relax();
1466
1467#ifdef CONFIG_X86_GENERICARCH
1468 if (num_online_cpus() > 8 && genapic == &apic_default)
1469 panic("Default flat APIC routing can't be used with > 8 cpus\n");
1470#endif
1471
1464 return 0; 1472 return 0;
1465} 1473}
1466 1474