diff options
author | Ingo Molnar <mingo@elte.hu> | 2007-02-16 04:27:34 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-02-16 11:13:57 -0500 |
commit | 95492e4646e5de8b43d9a7908d6177fb737b61f0 (patch) | |
tree | ae25cd206ca76f78d50ac2a206ef012e0ab1d9df /arch/i386/kernel | |
parent | 92c7e00254b2d0efc1e36ac3e45474ce1871b6b2 (diff) |
[PATCH] x86: rewrite SMP TSC sync code
make the TSC synchronization code more robust, and unify it between x86_64 and
i386.
The biggest change is the removal of the 'fix up TSCs' code on x86_64 and
i386, in some rare cases it was /causing/ time-warps on SMP systems.
The new code only checks for TSC asynchronity - and if it can prove a
time-warp (if it can observe the TSC going backwards when going from one CPU
to another within a critical section), then the TSC clock-source is turned
off.
The TSC synchronization-checking code also got moved into a separate file.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: john stultz <johnstul@us.ibm.com>
Cc: Roman Zippel <zippel@linux-m68k.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'arch/i386/kernel')
-rw-r--r-- | arch/i386/kernel/Makefile | 2 | ||||
-rw-r--r-- | arch/i386/kernel/smpboot.c | 178 | ||||
-rw-r--r-- | arch/i386/kernel/tsc.c | 4 | ||||
-rw-r--r-- | arch/i386/kernel/tsc_sync.c | 1 |
4 files changed, 17 insertions, 168 deletions
diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile index cbe4e601885c..c2b3b79dc436 100644 --- a/arch/i386/kernel/Makefile +++ b/arch/i386/kernel/Makefile | |||
@@ -18,7 +18,7 @@ obj-$(CONFIG_X86_MSR) += msr.o | |||
18 | obj-$(CONFIG_X86_CPUID) += cpuid.o | 18 | obj-$(CONFIG_X86_CPUID) += cpuid.o |
19 | obj-$(CONFIG_MICROCODE) += microcode.o | 19 | obj-$(CONFIG_MICROCODE) += microcode.o |
20 | obj-$(CONFIG_APM) += apm.o | 20 | obj-$(CONFIG_APM) += apm.o |
21 | obj-$(CONFIG_X86_SMP) += smp.o smpboot.o | 21 | obj-$(CONFIG_X86_SMP) += smp.o smpboot.o tsc_sync.o |
22 | obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o | 22 | obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o |
23 | obj-$(CONFIG_X86_MPPARSE) += mpparse.o | 23 | obj-$(CONFIG_X86_MPPARSE) += mpparse.o |
24 | obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o | 24 | obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o |
diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index f46a4d095e6c..6ddffe8aabb2 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c | |||
@@ -94,12 +94,6 @@ cpumask_t cpu_possible_map; | |||
94 | EXPORT_SYMBOL(cpu_possible_map); | 94 | EXPORT_SYMBOL(cpu_possible_map); |
95 | static cpumask_t smp_commenced_mask; | 95 | static cpumask_t smp_commenced_mask; |
96 | 96 | ||
97 | /* TSC's upper 32 bits can't be written in eariler CPU (before prescott), there | ||
98 | * is no way to resync one AP against BP. TBD: for prescott and above, we | ||
99 | * should use IA64's algorithm | ||
100 | */ | ||
101 | static int __devinitdata tsc_sync_disabled; | ||
102 | |||
103 | /* Per CPU bogomips and other parameters */ | 97 | /* Per CPU bogomips and other parameters */ |
104 | struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned; | 98 | struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned; |
105 | EXPORT_SYMBOL(cpu_data); | 99 | EXPORT_SYMBOL(cpu_data); |
@@ -216,151 +210,6 @@ valid_k7: | |||
216 | ; | 210 | ; |
217 | } | 211 | } |
218 | 212 | ||
219 | /* | ||
220 | * TSC synchronization. | ||
221 | * | ||
222 | * We first check whether all CPUs have their TSC's synchronized, | ||
223 | * then we print a warning if not, and always resync. | ||
224 | */ | ||
225 | |||
226 | static struct { | ||
227 | atomic_t start_flag; | ||
228 | atomic_t count_start; | ||
229 | atomic_t count_stop; | ||
230 | unsigned long long values[NR_CPUS]; | ||
231 | } tsc __cpuinitdata = { | ||
232 | .start_flag = ATOMIC_INIT(0), | ||
233 | .count_start = ATOMIC_INIT(0), | ||
234 | .count_stop = ATOMIC_INIT(0), | ||
235 | }; | ||
236 | |||
237 | #define NR_LOOPS 5 | ||
238 | |||
239 | static void __init synchronize_tsc_bp(void) | ||
240 | { | ||
241 | int i; | ||
242 | unsigned long long t0; | ||
243 | unsigned long long sum, avg; | ||
244 | long long delta; | ||
245 | unsigned int one_usec; | ||
246 | int buggy = 0; | ||
247 | |||
248 | printk(KERN_INFO "checking TSC synchronization across %u CPUs: ", num_booting_cpus()); | ||
249 | |||
250 | /* convert from kcyc/sec to cyc/usec */ | ||
251 | one_usec = cpu_khz / 1000; | ||
252 | |||
253 | atomic_set(&tsc.start_flag, 1); | ||
254 | wmb(); | ||
255 | |||
256 | /* | ||
257 | * We loop a few times to get a primed instruction cache, | ||
258 | * then the last pass is more or less synchronized and | ||
259 | * the BP and APs set their cycle counters to zero all at | ||
260 | * once. This reduces the chance of having random offsets | ||
261 | * between the processors, and guarantees that the maximum | ||
262 | * delay between the cycle counters is never bigger than | ||
263 | * the latency of information-passing (cachelines) between | ||
264 | * two CPUs. | ||
265 | */ | ||
266 | for (i = 0; i < NR_LOOPS; i++) { | ||
267 | /* | ||
268 | * all APs synchronize but they loop on '== num_cpus' | ||
269 | */ | ||
270 | while (atomic_read(&tsc.count_start) != num_booting_cpus()-1) | ||
271 | cpu_relax(); | ||
272 | atomic_set(&tsc.count_stop, 0); | ||
273 | wmb(); | ||
274 | /* | ||
275 | * this lets the APs save their current TSC: | ||
276 | */ | ||
277 | atomic_inc(&tsc.count_start); | ||
278 | |||
279 | rdtscll(tsc.values[smp_processor_id()]); | ||
280 | /* | ||
281 | * We clear the TSC in the last loop: | ||
282 | */ | ||
283 | if (i == NR_LOOPS-1) | ||
284 | write_tsc(0, 0); | ||
285 | |||
286 | /* | ||
287 | * Wait for all APs to leave the synchronization point: | ||
288 | */ | ||
289 | while (atomic_read(&tsc.count_stop) != num_booting_cpus()-1) | ||
290 | cpu_relax(); | ||
291 | atomic_set(&tsc.count_start, 0); | ||
292 | wmb(); | ||
293 | atomic_inc(&tsc.count_stop); | ||
294 | } | ||
295 | |||
296 | sum = 0; | ||
297 | for (i = 0; i < NR_CPUS; i++) { | ||
298 | if (cpu_isset(i, cpu_callout_map)) { | ||
299 | t0 = tsc.values[i]; | ||
300 | sum += t0; | ||
301 | } | ||
302 | } | ||
303 | avg = sum; | ||
304 | do_div(avg, num_booting_cpus()); | ||
305 | |||
306 | for (i = 0; i < NR_CPUS; i++) { | ||
307 | if (!cpu_isset(i, cpu_callout_map)) | ||
308 | continue; | ||
309 | delta = tsc.values[i] - avg; | ||
310 | if (delta < 0) | ||
311 | delta = -delta; | ||
312 | /* | ||
313 | * We report bigger than 2 microseconds clock differences. | ||
314 | */ | ||
315 | if (delta > 2*one_usec) { | ||
316 | long long realdelta; | ||
317 | |||
318 | if (!buggy) { | ||
319 | buggy = 1; | ||
320 | printk("\n"); | ||
321 | } | ||
322 | realdelta = delta; | ||
323 | do_div(realdelta, one_usec); | ||
324 | if (tsc.values[i] < avg) | ||
325 | realdelta = -realdelta; | ||
326 | |||
327 | if (realdelta) | ||
328 | printk(KERN_INFO "CPU#%d had %Ld usecs TSC " | ||
329 | "skew, fixed it up.\n", i, realdelta); | ||
330 | } | ||
331 | } | ||
332 | if (!buggy) | ||
333 | printk("passed.\n"); | ||
334 | } | ||
335 | |||
336 | static void __cpuinit synchronize_tsc_ap(void) | ||
337 | { | ||
338 | int i; | ||
339 | |||
340 | /* | ||
341 | * Not every cpu is online at the time | ||
342 | * this gets called, so we first wait for the BP to | ||
343 | * finish SMP initialization: | ||
344 | */ | ||
345 | while (!atomic_read(&tsc.start_flag)) | ||
346 | cpu_relax(); | ||
347 | |||
348 | for (i = 0; i < NR_LOOPS; i++) { | ||
349 | atomic_inc(&tsc.count_start); | ||
350 | while (atomic_read(&tsc.count_start) != num_booting_cpus()) | ||
351 | cpu_relax(); | ||
352 | |||
353 | rdtscll(tsc.values[smp_processor_id()]); | ||
354 | if (i == NR_LOOPS-1) | ||
355 | write_tsc(0, 0); | ||
356 | |||
357 | atomic_inc(&tsc.count_stop); | ||
358 | while (atomic_read(&tsc.count_stop) != num_booting_cpus()) | ||
359 | cpu_relax(); | ||
360 | } | ||
361 | } | ||
362 | #undef NR_LOOPS | ||
363 | |||
364 | extern void calibrate_delay(void); | 213 | extern void calibrate_delay(void); |
365 | 214 | ||
366 | static atomic_t init_deasserted; | 215 | static atomic_t init_deasserted; |
@@ -446,12 +295,6 @@ static void __cpuinit smp_callin(void) | |||
446 | * Allow the master to continue. | 295 | * Allow the master to continue. |
447 | */ | 296 | */ |
448 | cpu_set(cpuid, cpu_callin_map); | 297 | cpu_set(cpuid, cpu_callin_map); |
449 | |||
450 | /* | ||
451 | * Synchronize the TSC with the BP | ||
452 | */ | ||
453 | if (cpu_has_tsc && cpu_khz && !tsc_sync_disabled) | ||
454 | synchronize_tsc_ap(); | ||
455 | } | 298 | } |
456 | 299 | ||
457 | static int cpucount; | 300 | static int cpucount; |
@@ -554,6 +397,11 @@ static void __cpuinit start_secondary(void *unused) | |||
554 | smp_callin(); | 397 | smp_callin(); |
555 | while (!cpu_isset(smp_processor_id(), smp_commenced_mask)) | 398 | while (!cpu_isset(smp_processor_id(), smp_commenced_mask)) |
556 | rep_nop(); | 399 | rep_nop(); |
400 | /* | ||
401 | * Check TSC synchronization with the BP: | ||
402 | */ | ||
403 | check_tsc_sync_target(); | ||
404 | |||
557 | setup_secondary_clock(); | 405 | setup_secondary_clock(); |
558 | if (nmi_watchdog == NMI_IO_APIC) { | 406 | if (nmi_watchdog == NMI_IO_APIC) { |
559 | disable_8259A_irq(0); | 407 | disable_8259A_irq(0); |
@@ -1125,8 +973,6 @@ static int __cpuinit __smp_prepare_cpu(int cpu) | |||
1125 | info.cpu = cpu; | 973 | info.cpu = cpu; |
1126 | INIT_WORK(&info.task, do_warm_boot_cpu); | 974 | INIT_WORK(&info.task, do_warm_boot_cpu); |
1127 | 975 | ||
1128 | tsc_sync_disabled = 1; | ||
1129 | |||
1130 | /* init low mem mapping */ | 976 | /* init low mem mapping */ |
1131 | clone_pgd_range(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS, | 977 | clone_pgd_range(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS, |
1132 | min_t(unsigned long, KERNEL_PGD_PTRS, USER_PGD_PTRS)); | 978 | min_t(unsigned long, KERNEL_PGD_PTRS, USER_PGD_PTRS)); |
@@ -1134,7 +980,6 @@ static int __cpuinit __smp_prepare_cpu(int cpu) | |||
1134 | schedule_work(&info.task); | 980 | schedule_work(&info.task); |
1135 | wait_for_completion(&done); | 981 | wait_for_completion(&done); |
1136 | 982 | ||
1137 | tsc_sync_disabled = 0; | ||
1138 | zap_low_mappings(); | 983 | zap_low_mappings(); |
1139 | ret = 0; | 984 | ret = 0; |
1140 | exit: | 985 | exit: |
@@ -1331,12 +1176,6 @@ static void __init smp_boot_cpus(unsigned int max_cpus) | |||
1331 | smpboot_setup_io_apic(); | 1176 | smpboot_setup_io_apic(); |
1332 | 1177 | ||
1333 | setup_boot_clock(); | 1178 | setup_boot_clock(); |
1334 | |||
1335 | /* | ||
1336 | * Synchronize the TSC with the AP | ||
1337 | */ | ||
1338 | if (cpu_has_tsc && cpucount && cpu_khz) | ||
1339 | synchronize_tsc_bp(); | ||
1340 | } | 1179 | } |
1341 | 1180 | ||
1342 | /* These are wrappers to interface to the new boot process. Someone | 1181 | /* These are wrappers to interface to the new boot process. Someone |
@@ -1471,9 +1310,16 @@ int __cpuinit __cpu_up(unsigned int cpu) | |||
1471 | } | 1310 | } |
1472 | 1311 | ||
1473 | local_irq_enable(); | 1312 | local_irq_enable(); |
1313 | |||
1474 | per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; | 1314 | per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; |
1475 | /* Unleash the CPU! */ | 1315 | /* Unleash the CPU! */ |
1476 | cpu_set(cpu, smp_commenced_mask); | 1316 | cpu_set(cpu, smp_commenced_mask); |
1317 | |||
1318 | /* | ||
1319 | * Check TSC synchronization with the AP: | ||
1320 | */ | ||
1321 | check_tsc_sync_source(cpu); | ||
1322 | |||
1477 | while (!cpu_isset(cpu, cpu_online_map)) | 1323 | while (!cpu_isset(cpu, cpu_online_map)) |
1478 | cpu_relax(); | 1324 | cpu_relax(); |
1479 | 1325 | ||
diff --git a/arch/i386/kernel/tsc.c b/arch/i386/kernel/tsc.c index 6f6971da761c..0fd93107ff9a 100644 --- a/arch/i386/kernel/tsc.c +++ b/arch/i386/kernel/tsc.c | |||
@@ -406,8 +406,10 @@ out: | |||
406 | * Make an educated guess if the TSC is trustworthy and synchronized | 406 | * Make an educated guess if the TSC is trustworthy and synchronized |
407 | * over all CPUs. | 407 | * over all CPUs. |
408 | */ | 408 | */ |
409 | static __init int unsynchronized_tsc(void) | 409 | __cpuinit int unsynchronized_tsc(void) |
410 | { | 410 | { |
411 | if (!cpu_has_tsc || tsc_unstable) | ||
412 | return 1; | ||
411 | /* | 413 | /* |
412 | * Intel systems are normally all synchronized. | 414 | * Intel systems are normally all synchronized. |
413 | * Exceptions must mark TSC as unstable: | 415 | * Exceptions must mark TSC as unstable: |
diff --git a/arch/i386/kernel/tsc_sync.c b/arch/i386/kernel/tsc_sync.c new file mode 100644 index 000000000000..12424629af87 --- /dev/null +++ b/arch/i386/kernel/tsc_sync.c | |||
@@ -0,0 +1 @@ | |||
#include "../../x86_64/kernel/tsc_sync.c" | |||