aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/x86_64/kernel/nmi.c6
-rw-r--r--arch/x86_64/kernel/smp.c25
-rw-r--r--arch/x86_64/kernel/smpboot.c688
-rw-r--r--arch/x86_64/kernel/time.c9
4 files changed, 363 insertions, 365 deletions
diff --git a/arch/x86_64/kernel/nmi.c b/arch/x86_64/kernel/nmi.c
index 6ec5217f4b73..72f40ca4f06b 100644
--- a/arch/x86_64/kernel/nmi.c
+++ b/arch/x86_64/kernel/nmi.c
@@ -130,12 +130,6 @@ int __init check_nmi_watchdog (void)
130 mdelay((10*1000)/nmi_hz); // wait 10 ticks 130 mdelay((10*1000)/nmi_hz); // wait 10 ticks
131 131
132 for (cpu = 0; cpu < NR_CPUS; cpu++) { 132 for (cpu = 0; cpu < NR_CPUS; cpu++) {
133#ifdef CONFIG_SMP
134 /* Check cpu_callin_map here because that is set
135 after the timer is started. */
136 if (!cpu_isset(cpu, cpu_callin_map))
137 continue;
138#endif
139 if (cpu_pda[cpu].__nmi_count - counts[cpu] <= 5) { 133 if (cpu_pda[cpu].__nmi_count - counts[cpu] <= 5) {
140 printk("CPU#%d: NMI appears to be stuck (%d)!\n", 134 printk("CPU#%d: NMI appears to be stuck (%d)!\n",
141 cpu, 135 cpu,
diff --git a/arch/x86_64/kernel/smp.c b/arch/x86_64/kernel/smp.c
index e3cdbf9a88bd..1e379ed17b1d 100644
--- a/arch/x86_64/kernel/smp.c
+++ b/arch/x86_64/kernel/smp.c
@@ -27,6 +27,7 @@
27#include <asm/mach_apic.h> 27#include <asm/mach_apic.h>
28#include <asm/mmu_context.h> 28#include <asm/mmu_context.h>
29#include <asm/proto.h> 29#include <asm/proto.h>
30#include <asm/apicdef.h>
30 31
31/* 32/*
32 * Smarter SMP flushing macros. 33 * Smarter SMP flushing macros.
@@ -413,3 +414,27 @@ asmlinkage void smp_call_function_interrupt(void)
413 atomic_inc(&call_data->finished); 414 atomic_inc(&call_data->finished);
414 } 415 }
415} 416}
417
418int safe_smp_processor_id(void)
419{
420 int apicid, i;
421
422 if (disable_apic)
423 return 0;
424
425 apicid = hard_smp_processor_id();
426 if (x86_cpu_to_apicid[apicid] == apicid)
427 return apicid;
428
429 for (i = 0; i < NR_CPUS; ++i) {
430 if (x86_cpu_to_apicid[i] == apicid)
431 return i;
432 }
433
434 /* No entries in x86_cpu_to_apicid? Either no MPS|ACPI,
435 * or called too early. Either way, we must be CPU 0. */
436 if (x86_cpu_to_apicid[0] == BAD_APICID)
437 return 0;
438
439 return 0; /* Should not happen */
440}
diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c
index 7e2de8925b0f..eca01e82d3c7 100644
--- a/arch/x86_64/kernel/smpboot.c
+++ b/arch/x86_64/kernel/smpboot.c
@@ -12,8 +12,7 @@
12 * Pentium Pro and Pentium-II/Xeon MP machines. 12 * Pentium Pro and Pentium-II/Xeon MP machines.
13 * Original development of Linux SMP code supported by Caldera. 13 * Original development of Linux SMP code supported by Caldera.
14 * 14 *
15 * This code is released under the GNU General Public License version 2 or 15 * This code is released under the GNU General Public License version 2
16 * later.
17 * 16 *
18 * Fixes 17 * Fixes
19 * Felix Koop : NR_CPUS used properly 18 * Felix Koop : NR_CPUS used properly
@@ -31,9 +30,13 @@
31 * Tigran Aivazian : fixed "0.00 in /proc/uptime on SMP" bug. 30 * Tigran Aivazian : fixed "0.00 in /proc/uptime on SMP" bug.
32 * Maciej W. Rozycki : Bits for genuine 82489DX APICs 31 * Maciej W. Rozycki : Bits for genuine 82489DX APICs
33 * Andi Kleen : Changed for SMP boot into long mode. 32 * Andi Kleen : Changed for SMP boot into long mode.
34 * Rusty Russell : Hacked into shape for new "hotplug" boot process. 33 * Rusty Russell : Hacked into shape for new "hotplug" boot process.
34 * Andi Kleen : Converted to new state machine.
35 * Various cleanups.
36 * Probably mostly hotplug CPU ready now.
35 */ 37 */
36 38
39
37#include <linux/config.h> 40#include <linux/config.h>
38#include <linux/init.h> 41#include <linux/init.h>
39 42
@@ -54,11 +57,15 @@
54#include <asm/tlbflush.h> 57#include <asm/tlbflush.h>
55#include <asm/proto.h> 58#include <asm/proto.h>
56 59
60/* Change for real CPU hotplug. Note other files need to be fixed
61 first too. */
62#define __cpuinit __init
63#define __cpuinitdata __initdata
64
57/* Number of siblings per CPU package */ 65/* Number of siblings per CPU package */
58int smp_num_siblings = 1; 66int smp_num_siblings = 1;
59/* Package ID of each logical CPU */ 67/* Package ID of each logical CPU */
60u8 phys_proc_id[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID }; 68u8 phys_proc_id[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
61/* Core ID of each logical CPU */
62u8 cpu_core_id[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID }; 69u8 cpu_core_id[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
63EXPORT_SYMBOL(phys_proc_id); 70EXPORT_SYMBOL(phys_proc_id);
64EXPORT_SYMBOL(cpu_core_id); 71EXPORT_SYMBOL(cpu_core_id);
@@ -66,13 +73,24 @@ EXPORT_SYMBOL(cpu_core_id);
66/* Bitmask of currently online CPUs */ 73/* Bitmask of currently online CPUs */
67cpumask_t cpu_online_map; 74cpumask_t cpu_online_map;
68 75
76EXPORT_SYMBOL(cpu_online_map);
77
78/*
79 * Private maps to synchronize booting between AP and BP.
80 * Probably not needed anymore, but it makes for easier debugging. -AK
81 */
69cpumask_t cpu_callin_map; 82cpumask_t cpu_callin_map;
70cpumask_t cpu_callout_map; 83cpumask_t cpu_callout_map;
71static cpumask_t smp_commenced_mask; 84
85cpumask_t cpu_possible_map;
86EXPORT_SYMBOL(cpu_possible_map);
72 87
73/* Per CPU bogomips and other parameters */ 88/* Per CPU bogomips and other parameters */
74struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned; 89struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
75 90
91/* Set when the idlers are all forked */
92int smp_threads_ready;
93
76cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned; 94cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned;
77cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned; 95cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned;
78 96
@@ -80,8 +98,8 @@ cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned;
80 * Trampoline 80x86 program as an array. 98 * Trampoline 80x86 program as an array.
81 */ 99 */
82 100
83extern unsigned char trampoline_data []; 101extern unsigned char trampoline_data[];
84extern unsigned char trampoline_end []; 102extern unsigned char trampoline_end[];
85 103
86/* 104/*
87 * Currently trivial. Write the real->protected mode 105 * Currently trivial. Write the real->protected mode
@@ -89,7 +107,7 @@ extern unsigned char trampoline_end [];
89 * has made sure it's suitably aligned. 107 * has made sure it's suitably aligned.
90 */ 108 */
91 109
92static unsigned long __init setup_trampoline(void) 110static unsigned long __cpuinit setup_trampoline(void)
93{ 111{
94 void *tramp = __va(SMP_TRAMPOLINE_BASE); 112 void *tramp = __va(SMP_TRAMPOLINE_BASE);
95 memcpy(tramp, trampoline_data, trampoline_end - trampoline_data); 113 memcpy(tramp, trampoline_data, trampoline_end - trampoline_data);
@@ -101,7 +119,7 @@ static unsigned long __init setup_trampoline(void)
101 * a given CPU 119 * a given CPU
102 */ 120 */
103 121
104static void __init smp_store_cpu_info(int id) 122static void __cpuinit smp_store_cpu_info(int id)
105{ 123{
106 struct cpuinfo_x86 *c = cpu_data + id; 124 struct cpuinfo_x86 *c = cpu_data + id;
107 125
@@ -110,145 +128,101 @@ static void __init smp_store_cpu_info(int id)
110} 128}
111 129
112/* 130/*
113 * TSC synchronization. 131 * Synchronize TSCs of CPUs
114 * 132 *
115 * We first check whether all CPUs have their TSC's synchronized, 133 * This new algorithm is less accurate than the old "zero TSCs"
116 * then we print a warning if not, and always resync. 134 * one, but we cannot zero TSCs anymore in the new hotplug CPU
135 * model.
117 */ 136 */
118 137
119static atomic_t tsc_start_flag = ATOMIC_INIT(0); 138static atomic_t __cpuinitdata tsc_flag;
120static atomic_t tsc_count_start = ATOMIC_INIT(0); 139static __cpuinitdata DEFINE_SPINLOCK(tsc_sync_lock);
121static atomic_t tsc_count_stop = ATOMIC_INIT(0); 140static unsigned long long __cpuinitdata bp_tsc, ap_tsc;
122static unsigned long long tsc_values[NR_CPUS];
123 141
124#define NR_LOOPS 5 142#define NR_LOOPS 5
125 143
126extern unsigned int fast_gettimeoffset_quotient; 144static void __cpuinit sync_tsc_bp_init(int init)
127
128static void __init synchronize_tsc_bp (void)
129{ 145{
130 int i; 146 if (init)
131 unsigned long long t0; 147 _raw_spin_lock(&tsc_sync_lock);
132 unsigned long long sum, avg; 148 else
133 long long delta; 149 _raw_spin_unlock(&tsc_sync_lock);
134 long one_usec; 150 atomic_set(&tsc_flag, 0);
135 int buggy = 0; 151}
136
137 printk(KERN_INFO "checking TSC synchronization across %u CPUs: ",num_booting_cpus());
138
139 one_usec = cpu_khz;
140
141 atomic_set(&tsc_start_flag, 1);
142 wmb();
143
144 /*
145 * We loop a few times to get a primed instruction cache,
146 * then the last pass is more or less synchronized and
147 * the BP and APs set their cycle counters to zero all at
148 * once. This reduces the chance of having random offsets
149 * between the processors, and guarantees that the maximum
150 * delay between the cycle counters is never bigger than
151 * the latency of information-passing (cachelines) between
152 * two CPUs.
153 */
154 for (i = 0; i < NR_LOOPS; i++) {
155 /*
156 * all APs synchronize but they loop on '== num_cpus'
157 */
158 while (atomic_read(&tsc_count_start) != num_booting_cpus()-1) mb();
159 atomic_set(&tsc_count_stop, 0);
160 wmb();
161 /*
162 * this lets the APs save their current TSC:
163 */
164 atomic_inc(&tsc_count_start);
165
166 sync_core();
167 rdtscll(tsc_values[smp_processor_id()]);
168 /*
169 * We clear the TSC in the last loop:
170 */
171 if (i == NR_LOOPS-1)
172 write_tsc(0, 0);
173
174 /*
175 * Wait for all APs to leave the synchronization point:
176 */
177 while (atomic_read(&tsc_count_stop) != num_booting_cpus()-1) mb();
178 atomic_set(&tsc_count_start, 0);
179 wmb();
180 atomic_inc(&tsc_count_stop);
181 }
182
183 sum = 0;
184 for (i = 0; i < NR_CPUS; i++) {
185 if (cpu_isset(i, cpu_callout_map)) {
186 t0 = tsc_values[i];
187 sum += t0;
188 }
189 }
190 avg = sum / num_booting_cpus();
191
192 sum = 0;
193 for (i = 0; i < NR_CPUS; i++) {
194 if (!cpu_isset(i, cpu_callout_map))
195 continue;
196
197 delta = tsc_values[i] - avg;
198 if (delta < 0)
199 delta = -delta;
200 /*
201 * We report bigger than 2 microseconds clock differences.
202 */
203 if (delta > 2*one_usec) {
204 long realdelta;
205 if (!buggy) {
206 buggy = 1;
207 printk("\n");
208 }
209 realdelta = delta / one_usec;
210 if (tsc_values[i] < avg)
211 realdelta = -realdelta;
212
213 printk("BIOS BUG: CPU#%d improperly initialized, has %ld usecs TSC skew! FIXED.\n",
214 i, realdelta);
215 }
216 152
217 sum += delta; 153/*
218 } 154 * Synchronize TSC on AP with BP.
219 if (!buggy) 155 */
220 printk("passed.\n"); 156static void __cpuinit __sync_tsc_ap(void)
157{
158 if (!cpu_has_tsc)
159 return;
160 Dprintk("AP %d syncing TSC\n", smp_processor_id());
161
162 while (atomic_read(&tsc_flag) != 0)
163 cpu_relax();
164 atomic_inc(&tsc_flag);
165 mb();
166 _raw_spin_lock(&tsc_sync_lock);
167 wrmsrl(MSR_IA32_TSC, bp_tsc);
168 _raw_spin_unlock(&tsc_sync_lock);
169 rdtscll(ap_tsc);
170 mb();
171 atomic_inc(&tsc_flag);
172 mb();
221} 173}
222 174
223static void __init synchronize_tsc_ap (void) 175static void __cpuinit sync_tsc_ap(void)
224{ 176{
225 int i; 177 int i;
178 for (i = 0; i < NR_LOOPS; i++)
179 __sync_tsc_ap();
180}
226 181
227 /* 182/*
228 * Not every cpu is online at the time 183 * Synchronize TSC from BP to AP.
229 * this gets called, so we first wait for the BP to 184 */
230 * finish SMP initialization: 185static void __cpuinit __sync_tsc_bp(int cpu)
231 */ 186{
232 while (!atomic_read(&tsc_start_flag)) mb(); 187 if (!cpu_has_tsc)
233 188 return;
234 for (i = 0; i < NR_LOOPS; i++) {
235 atomic_inc(&tsc_count_start);
236 while (atomic_read(&tsc_count_start) != num_booting_cpus()) mb();
237 189
238 sync_core(); 190 /* Wait for AP */
239 rdtscll(tsc_values[smp_processor_id()]); 191 while (atomic_read(&tsc_flag) == 0)
240 if (i == NR_LOOPS-1) 192 cpu_relax();
241 write_tsc(0, 0); 193 /* Save BPs TSC */
194 sync_core();
195 rdtscll(bp_tsc);
196 /* Don't do the sync core here to avoid too much latency. */
197 mb();
198 /* Start the AP */
199 _raw_spin_unlock(&tsc_sync_lock);
200 /* Wait for AP again */
201 while (atomic_read(&tsc_flag) < 2)
202 cpu_relax();
203 rdtscl(bp_tsc);
204 barrier();
205}
242 206
243 atomic_inc(&tsc_count_stop); 207static void __cpuinit sync_tsc_bp(int cpu)
244 while (atomic_read(&tsc_count_stop) != num_booting_cpus()) mb(); 208{
209 int i;
210 for (i = 0; i < NR_LOOPS - 1; i++) {
211 __sync_tsc_bp(cpu);
212 sync_tsc_bp_init(1);
245 } 213 }
214 __sync_tsc_bp(cpu);
215 printk(KERN_INFO "Synced TSC of CPU %d difference %Ld\n",
216 cpu, ap_tsc - bp_tsc);
246} 217}
247#undef NR_LOOPS
248 218
249static atomic_t init_deasserted; 219static atomic_t init_deasserted __cpuinitdata;
250 220
251static void __init smp_callin(void) 221/*
222 * Report back to the Boot Processor.
223 * Running on AP.
224 */
225void __cpuinit smp_callin(void)
252{ 226{
253 int cpuid, phys_id; 227 int cpuid, phys_id;
254 unsigned long timeout; 228 unsigned long timeout;
@@ -259,7 +233,8 @@ static void __init smp_callin(void)
259 * our local APIC. We have to wait for the IPI or we'll 233 * our local APIC. We have to wait for the IPI or we'll
260 * lock up on an APIC access. 234 * lock up on an APIC access.
261 */ 235 */
262 while (!atomic_read(&init_deasserted)); 236 while (!atomic_read(&init_deasserted))
237 cpu_relax();
263 238
264 /* 239 /*
265 * (This works even if the APIC is not enabled.) 240 * (This works even if the APIC is not enabled.)
@@ -290,7 +265,7 @@ static void __init smp_callin(void)
290 */ 265 */
291 if (cpu_isset(cpuid, cpu_callout_map)) 266 if (cpu_isset(cpuid, cpu_callout_map))
292 break; 267 break;
293 rep_nop(); 268 cpu_relax();
294 } 269 }
295 270
296 if (!time_before(jiffies, timeout)) { 271 if (!time_before(jiffies, timeout)) {
@@ -325,20 +300,12 @@ static void __init smp_callin(void)
325 * Allow the master to continue. 300 * Allow the master to continue.
326 */ 301 */
327 cpu_set(cpuid, cpu_callin_map); 302 cpu_set(cpuid, cpu_callin_map);
328
329 /*
330 * Synchronize the TSC with the BP
331 */
332 if (cpu_has_tsc)
333 synchronize_tsc_ap();
334} 303}
335 304
336static int cpucount;
337
338/* 305/*
339 * Activate a secondary processor. 306 * Setup code on secondary processor (after comming out of the trampoline)
340 */ 307 */
341void __init start_secondary(void) 308void __cpuinit start_secondary(void)
342{ 309{
343 /* 310 /*
344 * Dont put anything before smp_callin(), SMP 311 * Dont put anything before smp_callin(), SMP
@@ -348,17 +315,18 @@ void __init start_secondary(void)
348 cpu_init(); 315 cpu_init();
349 smp_callin(); 316 smp_callin();
350 317
318 /*
319 * Synchronize the TSC with the BP
320 */
321 sync_tsc_ap();
322
351 /* otherwise gcc will move up the smp_processor_id before the cpu_init */ 323 /* otherwise gcc will move up the smp_processor_id before the cpu_init */
352 barrier(); 324 barrier();
353 325
354 Dprintk("cpu %d: waiting for commence\n", smp_processor_id());
355 while (!cpu_isset(smp_processor_id(), smp_commenced_mask))
356 rep_nop();
357
358 Dprintk("cpu %d: setting up apic clock\n", smp_processor_id()); 326 Dprintk("cpu %d: setting up apic clock\n", smp_processor_id());
359 setup_secondary_APIC_clock(); 327 setup_secondary_APIC_clock();
360 328
361 Dprintk("cpu %d: enabling apic timer\n", smp_processor_id()); 329 Dprintk("cpu %d: enabling apic timer\n", smp_processor_id());
362 330
363 if (nmi_watchdog == NMI_IO_APIC) { 331 if (nmi_watchdog == NMI_IO_APIC) {
364 disable_8259A_irq(0); 332 disable_8259A_irq(0);
@@ -367,26 +335,22 @@ void __init start_secondary(void)
367 } 335 }
368 336
369 337
370 enable_APIC_timer(); 338 enable_APIC_timer();
371 339
372 /* 340 /*
373 * low-memory mappings have been cleared, flush them from 341 * Allow the master to continue.
374 * the local TLBs too.
375 */ 342 */
376 local_flush_tlb();
377
378 Dprintk("cpu %d eSetting cpu_online_map\n", smp_processor_id());
379 cpu_set(smp_processor_id(), cpu_online_map); 343 cpu_set(smp_processor_id(), cpu_online_map);
380 wmb(); 344 mb();
381 345
382 cpu_idle(); 346 cpu_idle();
383} 347}
384 348
385extern volatile unsigned long init_rsp; 349extern volatile unsigned long init_rsp;
386extern void (*initial_code)(void); 350extern void (*initial_code)(void);
387 351
388#if APIC_DEBUG 352#if APIC_DEBUG
389static inline void inquire_remote_apic(int apicid) 353static void inquire_remote_apic(int apicid)
390{ 354{
391 unsigned i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 }; 355 unsigned i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 };
392 char *names[] = { "ID", "VERSION", "SPIV" }; 356 char *names[] = { "ID", "VERSION", "SPIV" };
@@ -423,7 +387,10 @@ static inline void inquire_remote_apic(int apicid)
423} 387}
424#endif 388#endif
425 389
426static int __init wakeup_secondary_via_INIT(int phys_apicid, unsigned int start_rip) 390/*
391 * Kick the secondary to wake up.
392 */
393static int __cpuinit wakeup_secondary_via_INIT(int phys_apicid, unsigned int start_rip)
427{ 394{
428 unsigned long send_status = 0, accept_status = 0; 395 unsigned long send_status = 0, accept_status = 0;
429 int maxlvt, timeout, num_starts, j; 396 int maxlvt, timeout, num_starts, j;
@@ -546,33 +513,36 @@ static int __init wakeup_secondary_via_INIT(int phys_apicid, unsigned int start_
546 return (send_status | accept_status); 513 return (send_status | accept_status);
547} 514}
548 515
549static void __init do_boot_cpu (int apicid) 516/*
517 * Boot one CPU.
518 */
519static int __cpuinit do_boot_cpu(int cpu, int apicid)
550{ 520{
551 struct task_struct *idle; 521 struct task_struct *idle;
552 unsigned long boot_error; 522 unsigned long boot_error;
553 int timeout, cpu; 523 int timeout;
554 unsigned long start_rip; 524 unsigned long start_rip;
555
556 cpu = ++cpucount;
557 /* 525 /*
558 * We can't use kernel_thread since we must avoid to 526 * We can't use kernel_thread since we must avoid to
559 * reschedule the child. 527 * reschedule the child.
560 */ 528 */
561 idle = fork_idle(cpu); 529 idle = fork_idle(cpu);
562 if (IS_ERR(idle)) 530 if (IS_ERR(idle)) {
563 panic("failed fork for CPU %d", cpu); 531 printk("failed fork for CPU %d\n", cpu);
532 return PTR_ERR(idle);
533 }
564 x86_cpu_to_apicid[cpu] = apicid; 534 x86_cpu_to_apicid[cpu] = apicid;
565 535
566 cpu_pda[cpu].pcurrent = idle; 536 cpu_pda[cpu].pcurrent = idle;
567 537
568 start_rip = setup_trampoline(); 538 start_rip = setup_trampoline();
569 539
570 init_rsp = idle->thread.rsp; 540 init_rsp = idle->thread.rsp;
571 per_cpu(init_tss,cpu).rsp0 = init_rsp; 541 per_cpu(init_tss,cpu).rsp0 = init_rsp;
572 initial_code = start_secondary; 542 initial_code = start_secondary;
573 clear_ti_thread_flag(idle->thread_info, TIF_FORK); 543 clear_ti_thread_flag(idle->thread_info, TIF_FORK);
574 544
575 printk(KERN_INFO "Booting processor %d/%d rip %lx rsp %lx\n", cpu, apicid, 545 printk(KERN_INFO "Booting processor %d/%d rip %lx rsp %lx\n", cpu, apicid,
576 start_rip, init_rsp); 546 start_rip, init_rsp);
577 547
578 /* 548 /*
@@ -609,7 +579,7 @@ static void __init do_boot_cpu (int apicid)
609 /* 579 /*
610 * Starting actual IPI sequence... 580 * Starting actual IPI sequence...
611 */ 581 */
612 boot_error = wakeup_secondary_via_INIT(apicid, start_rip); 582 boot_error = wakeup_secondary_via_INIT(apicid, start_rip);
613 583
614 if (!boot_error) { 584 if (!boot_error) {
615 /* 585 /*
@@ -650,58 +620,131 @@ static void __init do_boot_cpu (int apicid)
650 if (boot_error) { 620 if (boot_error) {
651 cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */ 621 cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */
652 clear_bit(cpu, &cpu_initialized); /* was set by cpu_init() */ 622 clear_bit(cpu, &cpu_initialized); /* was set by cpu_init() */
653 cpucount--; 623 cpu_clear(cpu, cpu_present_map);
624 cpu_clear(cpu, cpu_possible_map);
654 x86_cpu_to_apicid[cpu] = BAD_APICID; 625 x86_cpu_to_apicid[cpu] = BAD_APICID;
655 x86_cpu_to_log_apicid[cpu] = BAD_APICID; 626 x86_cpu_to_log_apicid[cpu] = BAD_APICID;
627 return -EIO;
656 } 628 }
629
630 return 0;
657} 631}
658 632
659static void smp_tune_scheduling (void) 633cycles_t cacheflush_time;
634unsigned long cache_decay_ticks;
635
636/*
637 * Construct cpu_sibling_map[], so that we can tell the sibling CPU
638 * on SMT systems efficiently.
639 */
640static __cpuinit void detect_siblings(void)
660{ 641{
661 int cachesize; /* kB */ 642 int cpu;
662 unsigned long bandwidth = 1000; /* MB/s */
663 /*
664 * Rough estimation for SMP scheduling, this is the number of
665 * cycles it takes for a fully memory-limited process to flush
666 * the SMP-local cache.
667 *
668 * (For a P5 this pretty much means we will choose another idle
669 * CPU almost always at wakeup time (this is due to the small
670 * L1 cache), on PIIs it's around 50-100 usecs, depending on
671 * the cache size)
672 */
673 643
674 if (!cpu_khz) { 644 for (cpu = 0; cpu < NR_CPUS; cpu++) {
675 return; 645 cpus_clear(cpu_sibling_map[cpu]);
676 } else { 646 cpus_clear(cpu_core_map[cpu]);
677 cachesize = boot_cpu_data.x86_cache_size; 647 }
678 if (cachesize == -1) { 648
679 cachesize = 16; /* Pentiums, 2x8kB cache */ 649 for_each_online_cpu (cpu) {
680 bandwidth = 100; 650 struct cpuinfo_x86 *c = cpu_data + cpu;
651 int siblings = 0;
652 int i;
653 if (smp_num_siblings > 1) {
654 for_each_online_cpu (i) {
655 if (cpu_core_id[cpu] == phys_proc_id[i]) {
656 siblings++;
657 cpu_set(i, cpu_sibling_map[cpu]);
658 }
659 }
660 } else {
661 siblings++;
662 cpu_set(cpu, cpu_sibling_map[cpu]);
663 }
664
665 if (siblings != smp_num_siblings) {
666 printk(KERN_WARNING
667 "WARNING: %d siblings found for CPU%d, should be %d\n",
668 siblings, cpu, smp_num_siblings);
669 smp_num_siblings = siblings;
681 } 670 }
671 if (c->x86_num_cores > 1) {
672 for_each_online_cpu(i) {
673 if (phys_proc_id[cpu] == phys_proc_id[i])
674 cpu_set(i, cpu_core_map[cpu]);
675 }
676 } else
677 cpu_core_map[cpu] = cpu_sibling_map[cpu];
682 } 678 }
683} 679}
684 680
685/* 681/*
686 * Cycle through the processors sending APIC IPIs to boot each. 682 * Cleanup possible dangling ends...
687 */ 683 */
688 684static __cpuinit void smp_cleanup_boot(void)
689static void __init smp_boot_cpus(unsigned int max_cpus)
690{ 685{
691 unsigned apicid, cpu, bit, kicked; 686 /*
687 * Paranoid: Set warm reset code and vector here back
688 * to default values.
689 */
690 CMOS_WRITE(0, 0xf);
692 691
693 nmi_watchdog_default(); 692 /*
693 * Reset trampoline flag
694 */
695 *((volatile int *) phys_to_virt(0x467)) = 0;
694 696
697#ifndef CONFIG_HOTPLUG_CPU
695 /* 698 /*
696 * Setup boot CPU information 699 * Free pages reserved for SMP bootup.
700 * When you add hotplug CPU support later remove this
701 * Note there is more work to be done for later CPU bootup.
697 */ 702 */
698 smp_store_cpu_info(0); /* Final full version of the data */
699 printk(KERN_INFO "CPU%d: ", 0);
700 print_cpu_info(&cpu_data[0]);
701 703
702 current_thread_info()->cpu = 0; 704 free_page((unsigned long) __va(PAGE_SIZE));
703 smp_tune_scheduling(); 705 free_page((unsigned long) __va(SMP_TRAMPOLINE_BASE));
706#endif
707}
708
709/*
710 * Fall back to non SMP mode after errors.
711 *
712 * RED-PEN audit/test this more. I bet there is more state messed up here.
713 */
714static __cpuinit void disable_smp(void)
715{
716 cpu_present_map = cpumask_of_cpu(0);
717 cpu_possible_map = cpumask_of_cpu(0);
718 if (smp_found_config)
719 phys_cpu_present_map = physid_mask_of_physid(boot_cpu_id);
720 else
721 phys_cpu_present_map = physid_mask_of_physid(0);
722 cpu_set(0, cpu_sibling_map[0]);
723 cpu_set(0, cpu_core_map[0]);
724}
725
726/*
727 * Handle user cpus=... parameter.
728 */
729static __cpuinit void enforce_max_cpus(unsigned max_cpus)
730{
731 int i, k;
732 k = 0;
733 for (i = 0; i < NR_CPUS; i++) {
734 if (!cpu_possible(i))
735 continue;
736 if (++k > max_cpus) {
737 cpu_clear(i, cpu_possible_map);
738 cpu_clear(i, cpu_present_map);
739 }
740 }
741}
704 742
743/*
744 * Various sanity checks.
745 */
746static int __cpuinit smp_sanity_check(unsigned max_cpus)
747{
705 if (!physid_isset(hard_smp_processor_id(), phys_cpu_present_map)) { 748 if (!physid_isset(hard_smp_processor_id(), phys_cpu_present_map)) {
706 printk("weird, boot CPU (#%d) not listed by the BIOS.\n", 749 printk("weird, boot CPU (#%d) not listed by the BIOS.\n",
707 hard_smp_processor_id()); 750 hard_smp_processor_id());
@@ -714,15 +757,11 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
714 */ 757 */
715 if (!smp_found_config) { 758 if (!smp_found_config) {
716 printk(KERN_NOTICE "SMP motherboard not detected.\n"); 759 printk(KERN_NOTICE "SMP motherboard not detected.\n");
717 io_apic_irqs = 0; 760 disable_smp();
718 cpu_online_map = cpumask_of_cpu(0);
719 cpu_set(0, cpu_sibling_map[0]);
720 cpu_set(0, cpu_core_map[0]);
721 phys_cpu_present_map = physid_mask_of_physid(0);
722 if (APIC_init_uniprocessor()) 761 if (APIC_init_uniprocessor())
723 printk(KERN_NOTICE "Local APIC not detected." 762 printk(KERN_NOTICE "Local APIC not detected."
724 " Using dummy APIC emulation.\n"); 763 " Using dummy APIC emulation.\n");
725 goto smp_done; 764 return -1;
726 } 765 }
727 766
728 /* 767 /*
@@ -742,213 +781,146 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
742 printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n", 781 printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n",
743 boot_cpu_id); 782 boot_cpu_id);
744 printk(KERN_ERR "... forcing use of dummy APIC emulation. (tell your hw vendor)\n"); 783 printk(KERN_ERR "... forcing use of dummy APIC emulation. (tell your hw vendor)\n");
745 io_apic_irqs = 0; 784 nr_ioapics = 0;
746 cpu_online_map = cpumask_of_cpu(0); 785 return -1;
747 cpu_set(0, cpu_sibling_map[0]);
748 cpu_set(0, cpu_core_map[0]);
749 phys_cpu_present_map = physid_mask_of_physid(0);
750 disable_apic = 1;
751 goto smp_done;
752 } 786 }
753 787
754 verify_local_APIC();
755
756 /* 788 /*
757 * If SMP should be disabled, then really disable it! 789 * If SMP should be disabled, then really disable it!
758 */ 790 */
759 if (!max_cpus) { 791 if (!max_cpus) {
760 smp_found_config = 0;
761 printk(KERN_INFO "SMP mode deactivated, forcing use of dummy APIC emulation.\n"); 792 printk(KERN_INFO "SMP mode deactivated, forcing use of dummy APIC emulation.\n");
762 io_apic_irqs = 0; 793 nr_ioapics = 0;
763 cpu_online_map = cpumask_of_cpu(0); 794 return -1;
764 cpu_set(0, cpu_sibling_map[0]);
765 cpu_set(0, cpu_core_map[0]);
766 phys_cpu_present_map = physid_mask_of_physid(0);
767 disable_apic = 1;
768 goto smp_done;
769 } 795 }
770 796
771 connect_bsp_APIC(); 797 return 0;
772 setup_local_APIC(); 798}
773
774 if (GET_APIC_ID(apic_read(APIC_ID)) != boot_cpu_id)
775 BUG();
776
777 x86_cpu_to_apicid[0] = boot_cpu_id;
778
779 /*
780 * Now scan the CPU present map and fire up the other CPUs.
781 */
782 Dprintk("CPU present map: %lx\n", physids_coerce(phys_cpu_present_map));
783 799
784 kicked = 1; 800/*
785 for (bit = 0; kicked < NR_CPUS && bit < MAX_APICS; bit++) { 801 * Prepare for SMP bootup. The MP table or ACPI has been read
786 apicid = cpu_present_to_apicid(bit); 802 * earlier. Just do some sanity checking here and enable APIC mode.
787 /* 803 */
788 * Don't even attempt to start the boot CPU! 804void __cpuinit smp_prepare_cpus(unsigned int max_cpus)
789 */ 805{
790 if (apicid == boot_cpu_id || (apicid == BAD_APICID)) 806 int i;
791 continue;
792 807
793 if (!physid_isset(apicid, phys_cpu_present_map)) 808 nmi_watchdog_default();
794 continue; 809 current_cpu_data = boot_cpu_data;
795 if ((max_cpus >= 0) && (max_cpus <= cpucount+1)) 810 current_thread_info()->cpu = 0; /* needed? */
796 continue;
797 811
798 do_boot_cpu(apicid); 812 enforce_max_cpus(max_cpus);
799 ++kicked;
800 }
801 813
802 /* 814 /*
803 * Cleanup possible dangling ends... 815 * Fill in cpu_present_mask
804 */ 816 */
805 { 817 for (i = 0; i < NR_CPUS; i++) {
806 /* 818 int apicid = cpu_present_to_apicid(i);
807 * Install writable page 0 entry to set BIOS data area. 819 if (physid_isset(apicid, phys_cpu_present_map)) {
808 */ 820 cpu_set(i, cpu_present_map);
809 local_flush_tlb(); 821 /* possible map would be different if we supported real
810 822 CPU hotplug. */
811 /* 823 cpu_set(i, cpu_possible_map);
812 * Paranoid: Set warm reset code and vector here back 824 }
813 * to default values.
814 */
815 CMOS_WRITE(0, 0xf);
816
817 *((volatile int *) phys_to_virt(0x467)) = 0;
818 } 825 }
819 826
820 /* 827 if (smp_sanity_check(max_cpus) < 0) {
821 * Allow the user to impress friends. 828 printk(KERN_INFO "SMP disabled\n");
822 */ 829 disable_smp();
823 830 return;
824 Dprintk("Before bogomips.\n");
825 if (!cpucount) {
826 printk(KERN_INFO "Only one processor found.\n");
827 } else {
828 unsigned long bogosum = 0;
829 for (cpu = 0; cpu < NR_CPUS; cpu++)
830 if (cpu_isset(cpu, cpu_callout_map))
831 bogosum += cpu_data[cpu].loops_per_jiffy;
832 printk(KERN_INFO "Total of %d processors activated (%lu.%02lu BogoMIPS).\n",
833 cpucount+1,
834 bogosum/(500000/HZ),
835 (bogosum/(5000/HZ))%100);
836 Dprintk("Before bogocount - setting activated=1.\n");
837 } 831 }
838 832
833
839 /* 834 /*
840 * Construct cpu_sibling_map[], so that we can tell the 835 * Switch from PIC to APIC mode.
841 * sibling CPU efficiently.
842 */ 836 */
843 for (cpu = 0; cpu < NR_CPUS; cpu++) { 837 connect_bsp_APIC();
844 cpus_clear(cpu_sibling_map[cpu]); 838 setup_local_APIC();
845 cpus_clear(cpu_core_map[cpu]);
846 }
847
848 for (cpu = 0; cpu < NR_CPUS; cpu++) {
849 struct cpuinfo_x86 *c = cpu_data + cpu;
850 int siblings = 0;
851 int i;
852 if (!cpu_isset(cpu, cpu_callout_map))
853 continue;
854
855 if (smp_num_siblings > 1) {
856 for (i = 0; i < NR_CPUS; i++) {
857 if (!cpu_isset(i, cpu_callout_map))
858 continue;
859 if (phys_proc_id[cpu] == cpu_core_id[i]) {
860 siblings++;
861 cpu_set(i, cpu_sibling_map[cpu]);
862 }
863 }
864 } else {
865 siblings++;
866 cpu_set(cpu, cpu_sibling_map[cpu]);
867 }
868 839
869 if (siblings != smp_num_siblings) { 840 if (GET_APIC_ID(apic_read(APIC_ID)) != boot_cpu_id) {
870 printk(KERN_WARNING 841 panic("Boot APIC ID in local APIC unexpected (%d vs %d)",
871 "WARNING: %d siblings found for CPU%d, should be %d\n", 842 GET_APIC_ID(apic_read(APIC_ID)), boot_cpu_id);
872 siblings, cpu, smp_num_siblings); 843 /* Or can we switch back to PIC here? */
873 smp_num_siblings = siblings;
874 }
875 if (c->x86_num_cores > 1) {
876 for (i = 0; i < NR_CPUS; i++) {
877 if (!cpu_isset(i, cpu_callout_map))
878 continue;
879 if (phys_proc_id[cpu] == phys_proc_id[i]) {
880 cpu_set(i, cpu_core_map[cpu]);
881 }
882 }
883 } else
884 cpu_core_map[cpu] = cpu_sibling_map[cpu];
885 } 844 }
886 845 x86_cpu_to_apicid[0] = boot_cpu_id;
887 Dprintk("Boot done.\n");
888 846
889 /* 847 /*
890 * Here we can be sure that there is an IO-APIC in the system. Let's 848 * Now start the IO-APICs
891 * go and set it up:
892 */ 849 */
893 if (!skip_ioapic_setup && nr_ioapics) 850 if (!skip_ioapic_setup && nr_ioapics)
894 setup_IO_APIC(); 851 setup_IO_APIC();
895 else 852 else
896 nr_ioapics = 0; 853 nr_ioapics = 0;
897 854
898 setup_boot_APIC_clock();
899
900 /* 855 /*
901 * Synchronize the TSC with the AP 856 * Set up local APIC timer on boot CPU.
902 */ 857 */
903 if (cpu_has_tsc && cpucount)
904 synchronize_tsc_bp();
905 858
906 smp_done: 859 setup_boot_APIC_clock();
907 time_init_smp();
908} 860}
909 861
910/* These are wrappers to interface to the new boot process. Someone 862/*
911 who understands all this stuff should rewrite it properly. --RR 15/Jul/02 */ 863 * Early setup to make printk work.
912void __init smp_prepare_cpus(unsigned int max_cpus) 864 */
865void __init smp_prepare_boot_cpu(void)
913{ 866{
914 smp_boot_cpus(max_cpus); 867 int me = smp_processor_id();
868 cpu_set(me, cpu_online_map);
869 cpu_set(me, cpu_callout_map);
915} 870}
916 871
917void __devinit smp_prepare_boot_cpu(void) 872/*
873 * Entry point to boot a CPU.
874 *
875 * This is all __cpuinit, not __devinit for now because we don't support
876 * CPU hotplug (yet).
877 */
878int __cpuinit __cpu_up(unsigned int cpu)
918{ 879{
919 cpu_set(smp_processor_id(), cpu_online_map); 880 int err;
920 cpu_set(smp_processor_id(), cpu_callout_map); 881 int apicid = cpu_present_to_apicid(cpu);
921}
922 882
923int __devinit __cpu_up(unsigned int cpu) 883 WARN_ON(irqs_disabled());
924{
925 /* This only works at boot for x86. See "rewrite" above. */
926 if (cpu_isset(cpu, smp_commenced_mask)) {
927 local_irq_enable();
928 return -ENOSYS;
929 }
930 884
931 /* In case one didn't come up */ 885 Dprintk("++++++++++++++++++++=_---CPU UP %u\n", cpu);
932 if (!cpu_isset(cpu, cpu_callin_map)) { 886
933 local_irq_enable(); 887 if (apicid == BAD_APICID || apicid == boot_cpu_id ||
934 return -EIO; 888 !physid_isset(apicid, phys_cpu_present_map)) {
889 printk("__cpu_up: bad cpu %d\n", cpu);
890 return -EINVAL;
891 }
892 sync_tsc_bp_init(1);
893
894 /* Boot it! */
895 err = do_boot_cpu(cpu, apicid);
896 if (err < 0) {
897 sync_tsc_bp_init(0);
898 Dprintk("do_boot_cpu failed %d\n", err);
899 return err;
935 } 900 }
936 local_irq_enable(); 901
902 sync_tsc_bp(cpu);
937 903
938 /* Unleash the CPU! */ 904 /* Unleash the CPU! */
939 Dprintk("waiting for cpu %d\n", cpu); 905 Dprintk("waiting for cpu %d\n", cpu);
940 906
941 cpu_set(cpu, smp_commenced_mask);
942 while (!cpu_isset(cpu, cpu_online_map)) 907 while (!cpu_isset(cpu, cpu_online_map))
943 mb(); 908 cpu_relax();
944 return 0; 909 return 0;
945} 910}
946 911
947void __init smp_cpus_done(unsigned int max_cpus) 912/*
913 * Finish the SMP boot.
914 */
915void __cpuinit smp_cpus_done(unsigned int max_cpus)
948{ 916{
917 zap_low_mappings();
918 smp_cleanup_boot();
919
949#ifdef CONFIG_X86_IO_APIC 920#ifdef CONFIG_X86_IO_APIC
950 setup_ioapic_dest(); 921 setup_ioapic_dest();
951#endif 922#endif
952 zap_low_mappings();
953}
954 923
924 detect_siblings();
925 time_init_gtod();
926}
diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c
index c7a1b50b4af3..943a9bfba209 100644
--- a/arch/x86_64/kernel/time.c
+++ b/arch/x86_64/kernel/time.c
@@ -916,9 +916,16 @@ void __init time_init(void)
916 setup_irq(0, &irq0); 916 setup_irq(0, &irq0);
917 917
918 set_cyc2ns_scale(cpu_khz / 1000); 918 set_cyc2ns_scale(cpu_khz / 1000);
919
920#ifndef CONFIG_SMP
921 time_init_gtod();
922#endif
919} 923}
920 924
921void __init time_init_smp(void) 925/*
926 * Decide after all CPUs are booted what mode gettimeofday should use.
927 */
928void __init time_init_gtod(void)
922{ 929{
923 char *timetype; 930 char *timetype;
924 931