aboutsummaryrefslogtreecommitdiffstats
path: root/arch/i386/kernel/smpboot.c
diff options
context:
space:
mode:
authorJeff Garzik <jeff@garzik.org>2007-02-17 15:11:43 -0500
committerJeff Garzik <jeff@garzik.org>2007-02-17 15:11:43 -0500
commitf630fe2817601314b2eb7ca5ddc23c7834646731 (patch)
tree3bfb4939b7bbc3859575ca8b58fa3f929b015941 /arch/i386/kernel/smpboot.c
parent48c871c1f6a7c7044dd76774fb469e65c7e2e4e8 (diff)
parent8a03d9a498eaf02c8a118752050a5154852c13bf (diff)
Merge branch 'master' into upstream
Diffstat (limited to 'arch/i386/kernel/smpboot.c')
-rw-r--r--arch/i386/kernel/smpboot.c203
1 files changed, 28 insertions, 175 deletions
diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c
index 8c6c8c52b95c..48bfcaa13ecc 100644
--- a/arch/i386/kernel/smpboot.c
+++ b/arch/i386/kernel/smpboot.c
@@ -63,6 +63,7 @@
63#include <mach_apic.h> 63#include <mach_apic.h>
64#include <mach_wakecpu.h> 64#include <mach_wakecpu.h>
65#include <smpboot_hooks.h> 65#include <smpboot_hooks.h>
66#include <asm/vmi.h>
66 67
67/* Set if we find a B stepping CPU */ 68/* Set if we find a B stepping CPU */
68static int __devinitdata smp_b_stepping; 69static int __devinitdata smp_b_stepping;
@@ -93,12 +94,6 @@ cpumask_t cpu_possible_map;
93EXPORT_SYMBOL(cpu_possible_map); 94EXPORT_SYMBOL(cpu_possible_map);
94static cpumask_t smp_commenced_mask; 95static cpumask_t smp_commenced_mask;
95 96
96/* TSC's upper 32 bits can't be written in eariler CPU (before prescott), there
97 * is no way to resync one AP against BP. TBD: for prescott and above, we
98 * should use IA64's algorithm
99 */
100static int __devinitdata tsc_sync_disabled;
101
102/* Per CPU bogomips and other parameters */ 97/* Per CPU bogomips and other parameters */
103struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned; 98struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
104EXPORT_SYMBOL(cpu_data); 99EXPORT_SYMBOL(cpu_data);
@@ -215,151 +210,6 @@ valid_k7:
215 ; 210 ;
216} 211}
217 212
218/*
219 * TSC synchronization.
220 *
221 * We first check whether all CPUs have their TSC's synchronized,
222 * then we print a warning if not, and always resync.
223 */
224
225static struct {
226 atomic_t start_flag;
227 atomic_t count_start;
228 atomic_t count_stop;
229 unsigned long long values[NR_CPUS];
230} tsc __cpuinitdata = {
231 .start_flag = ATOMIC_INIT(0),
232 .count_start = ATOMIC_INIT(0),
233 .count_stop = ATOMIC_INIT(0),
234};
235
236#define NR_LOOPS 5
237
238static void __init synchronize_tsc_bp(void)
239{
240 int i;
241 unsigned long long t0;
242 unsigned long long sum, avg;
243 long long delta;
244 unsigned int one_usec;
245 int buggy = 0;
246
247 printk(KERN_INFO "checking TSC synchronization across %u CPUs: ", num_booting_cpus());
248
249 /* convert from kcyc/sec to cyc/usec */
250 one_usec = cpu_khz / 1000;
251
252 atomic_set(&tsc.start_flag, 1);
253 wmb();
254
255 /*
256 * We loop a few times to get a primed instruction cache,
257 * then the last pass is more or less synchronized and
258 * the BP and APs set their cycle counters to zero all at
259 * once. This reduces the chance of having random offsets
260 * between the processors, and guarantees that the maximum
261 * delay between the cycle counters is never bigger than
262 * the latency of information-passing (cachelines) between
263 * two CPUs.
264 */
265 for (i = 0; i < NR_LOOPS; i++) {
266 /*
267 * all APs synchronize but they loop on '== num_cpus'
268 */
269 while (atomic_read(&tsc.count_start) != num_booting_cpus()-1)
270 cpu_relax();
271 atomic_set(&tsc.count_stop, 0);
272 wmb();
273 /*
274 * this lets the APs save their current TSC:
275 */
276 atomic_inc(&tsc.count_start);
277
278 rdtscll(tsc.values[smp_processor_id()]);
279 /*
280 * We clear the TSC in the last loop:
281 */
282 if (i == NR_LOOPS-1)
283 write_tsc(0, 0);
284
285 /*
286 * Wait for all APs to leave the synchronization point:
287 */
288 while (atomic_read(&tsc.count_stop) != num_booting_cpus()-1)
289 cpu_relax();
290 atomic_set(&tsc.count_start, 0);
291 wmb();
292 atomic_inc(&tsc.count_stop);
293 }
294
295 sum = 0;
296 for (i = 0; i < NR_CPUS; i++) {
297 if (cpu_isset(i, cpu_callout_map)) {
298 t0 = tsc.values[i];
299 sum += t0;
300 }
301 }
302 avg = sum;
303 do_div(avg, num_booting_cpus());
304
305 for (i = 0; i < NR_CPUS; i++) {
306 if (!cpu_isset(i, cpu_callout_map))
307 continue;
308 delta = tsc.values[i] - avg;
309 if (delta < 0)
310 delta = -delta;
311 /*
312 * We report bigger than 2 microseconds clock differences.
313 */
314 if (delta > 2*one_usec) {
315 long long realdelta;
316
317 if (!buggy) {
318 buggy = 1;
319 printk("\n");
320 }
321 realdelta = delta;
322 do_div(realdelta, one_usec);
323 if (tsc.values[i] < avg)
324 realdelta = -realdelta;
325
326 if (realdelta)
327 printk(KERN_INFO "CPU#%d had %Ld usecs TSC "
328 "skew, fixed it up.\n", i, realdelta);
329 }
330 }
331 if (!buggy)
332 printk("passed.\n");
333}
334
335static void __cpuinit synchronize_tsc_ap(void)
336{
337 int i;
338
339 /*
340 * Not every cpu is online at the time
341 * this gets called, so we first wait for the BP to
342 * finish SMP initialization:
343 */
344 while (!atomic_read(&tsc.start_flag))
345 cpu_relax();
346
347 for (i = 0; i < NR_LOOPS; i++) {
348 atomic_inc(&tsc.count_start);
349 while (atomic_read(&tsc.count_start) != num_booting_cpus())
350 cpu_relax();
351
352 rdtscll(tsc.values[smp_processor_id()]);
353 if (i == NR_LOOPS-1)
354 write_tsc(0, 0);
355
356 atomic_inc(&tsc.count_stop);
357 while (atomic_read(&tsc.count_stop) != num_booting_cpus())
358 cpu_relax();
359 }
360}
361#undef NR_LOOPS
362
363extern void calibrate_delay(void); 213extern void calibrate_delay(void);
364 214
365static atomic_t init_deasserted; 215static atomic_t init_deasserted;
@@ -437,20 +287,12 @@ static void __cpuinit smp_callin(void)
437 /* 287 /*
438 * Save our processor parameters 288 * Save our processor parameters
439 */ 289 */
440 smp_store_cpu_info(cpuid); 290 smp_store_cpu_info(cpuid);
441
442 disable_APIC_timer();
443 291
444 /* 292 /*
445 * Allow the master to continue. 293 * Allow the master to continue.
446 */ 294 */
447 cpu_set(cpuid, cpu_callin_map); 295 cpu_set(cpuid, cpu_callin_map);
448
449 /*
450 * Synchronize the TSC with the BP
451 */
452 if (cpu_has_tsc && cpu_khz && !tsc_sync_disabled)
453 synchronize_tsc_ap();
454} 296}
455 297
456static int cpucount; 298static int cpucount;
@@ -545,18 +387,25 @@ static void __cpuinit start_secondary(void *unused)
545 * booting is too fragile that we want to limit the 387 * booting is too fragile that we want to limit the
546 * things done here to the most necessary things. 388 * things done here to the most necessary things.
547 */ 389 */
390#ifdef CONFIG_VMI
391 vmi_bringup();
392#endif
548 secondary_cpu_init(); 393 secondary_cpu_init();
549 preempt_disable(); 394 preempt_disable();
550 smp_callin(); 395 smp_callin();
551 while (!cpu_isset(smp_processor_id(), smp_commenced_mask)) 396 while (!cpu_isset(smp_processor_id(), smp_commenced_mask))
552 rep_nop(); 397 rep_nop();
553 setup_secondary_APIC_clock(); 398 /*
399 * Check TSC synchronization with the BP:
400 */
401 check_tsc_sync_target();
402
403 setup_secondary_clock();
554 if (nmi_watchdog == NMI_IO_APIC) { 404 if (nmi_watchdog == NMI_IO_APIC) {
555 disable_8259A_irq(0); 405 disable_8259A_irq(0);
556 enable_NMI_through_LVT0(NULL); 406 enable_NMI_through_LVT0(NULL);
557 enable_8259A_irq(0); 407 enable_8259A_irq(0);
558 } 408 }
559 enable_APIC_timer();
560 /* 409 /*
561 * low-memory mappings have been cleared, flush them from 410 * low-memory mappings have been cleared, flush them from
562 * the local TLBs too. 411 * the local TLBs too.
@@ -619,7 +468,6 @@ extern struct {
619 unsigned short ss; 468 unsigned short ss;
620} stack_start; 469} stack_start;
621extern struct i386_pda *start_pda; 470extern struct i386_pda *start_pda;
622extern struct Xgt_desc_struct cpu_gdt_descr;
623 471
624#ifdef CONFIG_NUMA 472#ifdef CONFIG_NUMA
625 473
@@ -749,7 +597,7 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
749 /* 597 /*
750 * Due to the Pentium erratum 3AP. 598 * Due to the Pentium erratum 3AP.
751 */ 599 */
752 maxlvt = get_maxlvt(); 600 maxlvt = lapic_get_maxlvt();
753 if (maxlvt > 3) { 601 if (maxlvt > 3) {
754 apic_read_around(APIC_SPIV); 602 apic_read_around(APIC_SPIV);
755 apic_write(APIC_ESR, 0); 603 apic_write(APIC_ESR, 0);
@@ -835,11 +683,18 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
835 num_starts = 0; 683 num_starts = 0;
836 684
837 /* 685 /*
686 * Paravirt / VMI wants a startup IPI hook here to set up the
687 * target processor state.
688 */
689 startup_ipi_hook(phys_apicid, (unsigned long) start_secondary,
690 (unsigned long) stack_start.esp);
691
692 /*
838 * Run STARTUP IPI loop. 693 * Run STARTUP IPI loop.
839 */ 694 */
840 Dprintk("#startup loops: %d.\n", num_starts); 695 Dprintk("#startup loops: %d.\n", num_starts);
841 696
842 maxlvt = get_maxlvt(); 697 maxlvt = lapic_get_maxlvt();
843 698
844 for (j = 1; j <= num_starts; j++) { 699 for (j = 1; j <= num_starts; j++) {
845 Dprintk("Sending STARTUP #%d.\n",j); 700 Dprintk("Sending STARTUP #%d.\n",j);
@@ -1115,8 +970,6 @@ static int __cpuinit __smp_prepare_cpu(int cpu)
1115 info.cpu = cpu; 970 info.cpu = cpu;
1116 INIT_WORK(&info.task, do_warm_boot_cpu); 971 INIT_WORK(&info.task, do_warm_boot_cpu);
1117 972
1118 tsc_sync_disabled = 1;
1119
1120 /* init low mem mapping */ 973 /* init low mem mapping */
1121 clone_pgd_range(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS, 974 clone_pgd_range(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS,
1122 min_t(unsigned long, KERNEL_PGD_PTRS, USER_PGD_PTRS)); 975 min_t(unsigned long, KERNEL_PGD_PTRS, USER_PGD_PTRS));
@@ -1124,7 +977,6 @@ static int __cpuinit __smp_prepare_cpu(int cpu)
1124 schedule_work(&info.task); 977 schedule_work(&info.task);
1125 wait_for_completion(&done); 978 wait_for_completion(&done);
1126 979
1127 tsc_sync_disabled = 0;
1128 zap_low_mappings(); 980 zap_low_mappings();
1129 ret = 0; 981 ret = 0;
1130exit: 982exit:
@@ -1320,13 +1172,7 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
1320 1172
1321 smpboot_setup_io_apic(); 1173 smpboot_setup_io_apic();
1322 1174
1323 setup_boot_APIC_clock(); 1175 setup_boot_clock();
1324
1325 /*
1326 * Synchronize the TSC with the AP
1327 */
1328 if (cpu_has_tsc && cpucount && cpu_khz)
1329 synchronize_tsc_bp();
1330} 1176}
1331 1177
1332/* These are wrappers to interface to the new boot process. Someone 1178/* These are wrappers to interface to the new boot process. Someone
@@ -1461,9 +1307,16 @@ int __cpuinit __cpu_up(unsigned int cpu)
1461 } 1307 }
1462 1308
1463 local_irq_enable(); 1309 local_irq_enable();
1310
1464 per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; 1311 per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
1465 /* Unleash the CPU! */ 1312 /* Unleash the CPU! */
1466 cpu_set(cpu, smp_commenced_mask); 1313 cpu_set(cpu, smp_commenced_mask);
1314
1315 /*
1316 * Check TSC synchronization with the AP:
1317 */
1318 check_tsc_sync_source(cpu);
1319
1467 while (!cpu_isset(cpu, cpu_online_map)) 1320 while (!cpu_isset(cpu, cpu_online_map))
1468 cpu_relax(); 1321 cpu_relax();
1469 1322