diff options
author | Jeff Garzik <jeff@garzik.org> | 2007-02-17 15:11:43 -0500 |
---|---|---|
committer | Jeff Garzik <jeff@garzik.org> | 2007-02-17 15:11:43 -0500 |
commit | f630fe2817601314b2eb7ca5ddc23c7834646731 (patch) | |
tree | 3bfb4939b7bbc3859575ca8b58fa3f929b015941 /arch/i386/kernel/smpboot.c | |
parent | 48c871c1f6a7c7044dd76774fb469e65c7e2e4e8 (diff) | |
parent | 8a03d9a498eaf02c8a118752050a5154852c13bf (diff) |
Merge branch 'master' into upstream
Diffstat (limited to 'arch/i386/kernel/smpboot.c')
-rw-r--r-- | arch/i386/kernel/smpboot.c | 203 |
1 files changed, 28 insertions, 175 deletions
diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index 8c6c8c52b95c..48bfcaa13ecc 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c | |||
@@ -63,6 +63,7 @@ | |||
63 | #include <mach_apic.h> | 63 | #include <mach_apic.h> |
64 | #include <mach_wakecpu.h> | 64 | #include <mach_wakecpu.h> |
65 | #include <smpboot_hooks.h> | 65 | #include <smpboot_hooks.h> |
66 | #include <asm/vmi.h> | ||
66 | 67 | ||
67 | /* Set if we find a B stepping CPU */ | 68 | /* Set if we find a B stepping CPU */ |
68 | static int __devinitdata smp_b_stepping; | 69 | static int __devinitdata smp_b_stepping; |
@@ -93,12 +94,6 @@ cpumask_t cpu_possible_map; | |||
93 | EXPORT_SYMBOL(cpu_possible_map); | 94 | EXPORT_SYMBOL(cpu_possible_map); |
94 | static cpumask_t smp_commenced_mask; | 95 | static cpumask_t smp_commenced_mask; |
95 | 96 | ||
96 | /* TSC's upper 32 bits can't be written in eariler CPU (before prescott), there | ||
97 | * is no way to resync one AP against BP. TBD: for prescott and above, we | ||
98 | * should use IA64's algorithm | ||
99 | */ | ||
100 | static int __devinitdata tsc_sync_disabled; | ||
101 | |||
102 | /* Per CPU bogomips and other parameters */ | 97 | /* Per CPU bogomips and other parameters */ |
103 | struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned; | 98 | struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned; |
104 | EXPORT_SYMBOL(cpu_data); | 99 | EXPORT_SYMBOL(cpu_data); |
@@ -215,151 +210,6 @@ valid_k7: | |||
215 | ; | 210 | ; |
216 | } | 211 | } |
217 | 212 | ||
218 | /* | ||
219 | * TSC synchronization. | ||
220 | * | ||
221 | * We first check whether all CPUs have their TSC's synchronized, | ||
222 | * then we print a warning if not, and always resync. | ||
223 | */ | ||
224 | |||
225 | static struct { | ||
226 | atomic_t start_flag; | ||
227 | atomic_t count_start; | ||
228 | atomic_t count_stop; | ||
229 | unsigned long long values[NR_CPUS]; | ||
230 | } tsc __cpuinitdata = { | ||
231 | .start_flag = ATOMIC_INIT(0), | ||
232 | .count_start = ATOMIC_INIT(0), | ||
233 | .count_stop = ATOMIC_INIT(0), | ||
234 | }; | ||
235 | |||
236 | #define NR_LOOPS 5 | ||
237 | |||
238 | static void __init synchronize_tsc_bp(void) | ||
239 | { | ||
240 | int i; | ||
241 | unsigned long long t0; | ||
242 | unsigned long long sum, avg; | ||
243 | long long delta; | ||
244 | unsigned int one_usec; | ||
245 | int buggy = 0; | ||
246 | |||
247 | printk(KERN_INFO "checking TSC synchronization across %u CPUs: ", num_booting_cpus()); | ||
248 | |||
249 | /* convert from kcyc/sec to cyc/usec */ | ||
250 | one_usec = cpu_khz / 1000; | ||
251 | |||
252 | atomic_set(&tsc.start_flag, 1); | ||
253 | wmb(); | ||
254 | |||
255 | /* | ||
256 | * We loop a few times to get a primed instruction cache, | ||
257 | * then the last pass is more or less synchronized and | ||
258 | * the BP and APs set their cycle counters to zero all at | ||
259 | * once. This reduces the chance of having random offsets | ||
260 | * between the processors, and guarantees that the maximum | ||
261 | * delay between the cycle counters is never bigger than | ||
262 | * the latency of information-passing (cachelines) between | ||
263 | * two CPUs. | ||
264 | */ | ||
265 | for (i = 0; i < NR_LOOPS; i++) { | ||
266 | /* | ||
267 | * all APs synchronize but they loop on '== num_cpus' | ||
268 | */ | ||
269 | while (atomic_read(&tsc.count_start) != num_booting_cpus()-1) | ||
270 | cpu_relax(); | ||
271 | atomic_set(&tsc.count_stop, 0); | ||
272 | wmb(); | ||
273 | /* | ||
274 | * this lets the APs save their current TSC: | ||
275 | */ | ||
276 | atomic_inc(&tsc.count_start); | ||
277 | |||
278 | rdtscll(tsc.values[smp_processor_id()]); | ||
279 | /* | ||
280 | * We clear the TSC in the last loop: | ||
281 | */ | ||
282 | if (i == NR_LOOPS-1) | ||
283 | write_tsc(0, 0); | ||
284 | |||
285 | /* | ||
286 | * Wait for all APs to leave the synchronization point: | ||
287 | */ | ||
288 | while (atomic_read(&tsc.count_stop) != num_booting_cpus()-1) | ||
289 | cpu_relax(); | ||
290 | atomic_set(&tsc.count_start, 0); | ||
291 | wmb(); | ||
292 | atomic_inc(&tsc.count_stop); | ||
293 | } | ||
294 | |||
295 | sum = 0; | ||
296 | for (i = 0; i < NR_CPUS; i++) { | ||
297 | if (cpu_isset(i, cpu_callout_map)) { | ||
298 | t0 = tsc.values[i]; | ||
299 | sum += t0; | ||
300 | } | ||
301 | } | ||
302 | avg = sum; | ||
303 | do_div(avg, num_booting_cpus()); | ||
304 | |||
305 | for (i = 0; i < NR_CPUS; i++) { | ||
306 | if (!cpu_isset(i, cpu_callout_map)) | ||
307 | continue; | ||
308 | delta = tsc.values[i] - avg; | ||
309 | if (delta < 0) | ||
310 | delta = -delta; | ||
311 | /* | ||
312 | * We report bigger than 2 microseconds clock differences. | ||
313 | */ | ||
314 | if (delta > 2*one_usec) { | ||
315 | long long realdelta; | ||
316 | |||
317 | if (!buggy) { | ||
318 | buggy = 1; | ||
319 | printk("\n"); | ||
320 | } | ||
321 | realdelta = delta; | ||
322 | do_div(realdelta, one_usec); | ||
323 | if (tsc.values[i] < avg) | ||
324 | realdelta = -realdelta; | ||
325 | |||
326 | if (realdelta) | ||
327 | printk(KERN_INFO "CPU#%d had %Ld usecs TSC " | ||
328 | "skew, fixed it up.\n", i, realdelta); | ||
329 | } | ||
330 | } | ||
331 | if (!buggy) | ||
332 | printk("passed.\n"); | ||
333 | } | ||
334 | |||
335 | static void __cpuinit synchronize_tsc_ap(void) | ||
336 | { | ||
337 | int i; | ||
338 | |||
339 | /* | ||
340 | * Not every cpu is online at the time | ||
341 | * this gets called, so we first wait for the BP to | ||
342 | * finish SMP initialization: | ||
343 | */ | ||
344 | while (!atomic_read(&tsc.start_flag)) | ||
345 | cpu_relax(); | ||
346 | |||
347 | for (i = 0; i < NR_LOOPS; i++) { | ||
348 | atomic_inc(&tsc.count_start); | ||
349 | while (atomic_read(&tsc.count_start) != num_booting_cpus()) | ||
350 | cpu_relax(); | ||
351 | |||
352 | rdtscll(tsc.values[smp_processor_id()]); | ||
353 | if (i == NR_LOOPS-1) | ||
354 | write_tsc(0, 0); | ||
355 | |||
356 | atomic_inc(&tsc.count_stop); | ||
357 | while (atomic_read(&tsc.count_stop) != num_booting_cpus()) | ||
358 | cpu_relax(); | ||
359 | } | ||
360 | } | ||
361 | #undef NR_LOOPS | ||
362 | |||
363 | extern void calibrate_delay(void); | 213 | extern void calibrate_delay(void); |
364 | 214 | ||
365 | static atomic_t init_deasserted; | 215 | static atomic_t init_deasserted; |
@@ -437,20 +287,12 @@ static void __cpuinit smp_callin(void) | |||
437 | /* | 287 | /* |
438 | * Save our processor parameters | 288 | * Save our processor parameters |
439 | */ | 289 | */ |
440 | smp_store_cpu_info(cpuid); | 290 | smp_store_cpu_info(cpuid); |
441 | |||
442 | disable_APIC_timer(); | ||
443 | 291 | ||
444 | /* | 292 | /* |
445 | * Allow the master to continue. | 293 | * Allow the master to continue. |
446 | */ | 294 | */ |
447 | cpu_set(cpuid, cpu_callin_map); | 295 | cpu_set(cpuid, cpu_callin_map); |
448 | |||
449 | /* | ||
450 | * Synchronize the TSC with the BP | ||
451 | */ | ||
452 | if (cpu_has_tsc && cpu_khz && !tsc_sync_disabled) | ||
453 | synchronize_tsc_ap(); | ||
454 | } | 296 | } |
455 | 297 | ||
456 | static int cpucount; | 298 | static int cpucount; |
@@ -545,18 +387,25 @@ static void __cpuinit start_secondary(void *unused) | |||
545 | * booting is too fragile that we want to limit the | 387 | * booting is too fragile that we want to limit the |
546 | * things done here to the most necessary things. | 388 | * things done here to the most necessary things. |
547 | */ | 389 | */ |
390 | #ifdef CONFIG_VMI | ||
391 | vmi_bringup(); | ||
392 | #endif | ||
548 | secondary_cpu_init(); | 393 | secondary_cpu_init(); |
549 | preempt_disable(); | 394 | preempt_disable(); |
550 | smp_callin(); | 395 | smp_callin(); |
551 | while (!cpu_isset(smp_processor_id(), smp_commenced_mask)) | 396 | while (!cpu_isset(smp_processor_id(), smp_commenced_mask)) |
552 | rep_nop(); | 397 | rep_nop(); |
553 | setup_secondary_APIC_clock(); | 398 | /* |
399 | * Check TSC synchronization with the BP: | ||
400 | */ | ||
401 | check_tsc_sync_target(); | ||
402 | |||
403 | setup_secondary_clock(); | ||
554 | if (nmi_watchdog == NMI_IO_APIC) { | 404 | if (nmi_watchdog == NMI_IO_APIC) { |
555 | disable_8259A_irq(0); | 405 | disable_8259A_irq(0); |
556 | enable_NMI_through_LVT0(NULL); | 406 | enable_NMI_through_LVT0(NULL); |
557 | enable_8259A_irq(0); | 407 | enable_8259A_irq(0); |
558 | } | 408 | } |
559 | enable_APIC_timer(); | ||
560 | /* | 409 | /* |
561 | * low-memory mappings have been cleared, flush them from | 410 | * low-memory mappings have been cleared, flush them from |
562 | * the local TLBs too. | 411 | * the local TLBs too. |
@@ -619,7 +468,6 @@ extern struct { | |||
619 | unsigned short ss; | 468 | unsigned short ss; |
620 | } stack_start; | 469 | } stack_start; |
621 | extern struct i386_pda *start_pda; | 470 | extern struct i386_pda *start_pda; |
622 | extern struct Xgt_desc_struct cpu_gdt_descr; | ||
623 | 471 | ||
624 | #ifdef CONFIG_NUMA | 472 | #ifdef CONFIG_NUMA |
625 | 473 | ||
@@ -749,7 +597,7 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip) | |||
749 | /* | 597 | /* |
750 | * Due to the Pentium erratum 3AP. | 598 | * Due to the Pentium erratum 3AP. |
751 | */ | 599 | */ |
752 | maxlvt = get_maxlvt(); | 600 | maxlvt = lapic_get_maxlvt(); |
753 | if (maxlvt > 3) { | 601 | if (maxlvt > 3) { |
754 | apic_read_around(APIC_SPIV); | 602 | apic_read_around(APIC_SPIV); |
755 | apic_write(APIC_ESR, 0); | 603 | apic_write(APIC_ESR, 0); |
@@ -835,11 +683,18 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) | |||
835 | num_starts = 0; | 683 | num_starts = 0; |
836 | 684 | ||
837 | /* | 685 | /* |
686 | * Paravirt / VMI wants a startup IPI hook here to set up the | ||
687 | * target processor state. | ||
688 | */ | ||
689 | startup_ipi_hook(phys_apicid, (unsigned long) start_secondary, | ||
690 | (unsigned long) stack_start.esp); | ||
691 | |||
692 | /* | ||
838 | * Run STARTUP IPI loop. | 693 | * Run STARTUP IPI loop. |
839 | */ | 694 | */ |
840 | Dprintk("#startup loops: %d.\n", num_starts); | 695 | Dprintk("#startup loops: %d.\n", num_starts); |
841 | 696 | ||
842 | maxlvt = get_maxlvt(); | 697 | maxlvt = lapic_get_maxlvt(); |
843 | 698 | ||
844 | for (j = 1; j <= num_starts; j++) { | 699 | for (j = 1; j <= num_starts; j++) { |
845 | Dprintk("Sending STARTUP #%d.\n",j); | 700 | Dprintk("Sending STARTUP #%d.\n",j); |
@@ -1115,8 +970,6 @@ static int __cpuinit __smp_prepare_cpu(int cpu) | |||
1115 | info.cpu = cpu; | 970 | info.cpu = cpu; |
1116 | INIT_WORK(&info.task, do_warm_boot_cpu); | 971 | INIT_WORK(&info.task, do_warm_boot_cpu); |
1117 | 972 | ||
1118 | tsc_sync_disabled = 1; | ||
1119 | |||
1120 | /* init low mem mapping */ | 973 | /* init low mem mapping */ |
1121 | clone_pgd_range(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS, | 974 | clone_pgd_range(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS, |
1122 | min_t(unsigned long, KERNEL_PGD_PTRS, USER_PGD_PTRS)); | 975 | min_t(unsigned long, KERNEL_PGD_PTRS, USER_PGD_PTRS)); |
@@ -1124,7 +977,6 @@ static int __cpuinit __smp_prepare_cpu(int cpu) | |||
1124 | schedule_work(&info.task); | 977 | schedule_work(&info.task); |
1125 | wait_for_completion(&done); | 978 | wait_for_completion(&done); |
1126 | 979 | ||
1127 | tsc_sync_disabled = 0; | ||
1128 | zap_low_mappings(); | 980 | zap_low_mappings(); |
1129 | ret = 0; | 981 | ret = 0; |
1130 | exit: | 982 | exit: |
@@ -1320,13 +1172,7 @@ static void __init smp_boot_cpus(unsigned int max_cpus) | |||
1320 | 1172 | ||
1321 | smpboot_setup_io_apic(); | 1173 | smpboot_setup_io_apic(); |
1322 | 1174 | ||
1323 | setup_boot_APIC_clock(); | 1175 | setup_boot_clock(); |
1324 | |||
1325 | /* | ||
1326 | * Synchronize the TSC with the AP | ||
1327 | */ | ||
1328 | if (cpu_has_tsc && cpucount && cpu_khz) | ||
1329 | synchronize_tsc_bp(); | ||
1330 | } | 1176 | } |
1331 | 1177 | ||
1332 | /* These are wrappers to interface to the new boot process. Someone | 1178 | /* These are wrappers to interface to the new boot process. Someone |
@@ -1461,9 +1307,16 @@ int __cpuinit __cpu_up(unsigned int cpu) | |||
1461 | } | 1307 | } |
1462 | 1308 | ||
1463 | local_irq_enable(); | 1309 | local_irq_enable(); |
1310 | |||
1464 | per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; | 1311 | per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; |
1465 | /* Unleash the CPU! */ | 1312 | /* Unleash the CPU! */ |
1466 | cpu_set(cpu, smp_commenced_mask); | 1313 | cpu_set(cpu, smp_commenced_mask); |
1314 | |||
1315 | /* | ||
1316 | * Check TSC synchronization with the AP: | ||
1317 | */ | ||
1318 | check_tsc_sync_source(cpu); | ||
1319 | |||
1467 | while (!cpu_isset(cpu, cpu_online_map)) | 1320 | while (!cpu_isset(cpu, cpu_online_map)) |
1468 | cpu_relax(); | 1321 | cpu_relax(); |
1469 | 1322 | ||