aboutsummaryrefslogtreecommitdiffstats
path: root/arch/i386/kernel/smpboot.c
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2007-02-16 04:27:34 -0500
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-02-16 11:13:57 -0500
commit95492e4646e5de8b43d9a7908d6177fb737b61f0 (patch)
treeae25cd206ca76f78d50ac2a206ef012e0ab1d9df /arch/i386/kernel/smpboot.c
parent92c7e00254b2d0efc1e36ac3e45474ce1871b6b2 (diff)
[PATCH] x86: rewrite SMP TSC sync code
make the TSC synchronization code more robust, and unify it between x86_64 and i386. The biggest change is the removal of the 'fix up TSCs' code on x86_64 and i386, in some rare cases it was /causing/ time-warps on SMP systems. The new code only checks for TSC asynchronity - and if it can prove a time-warp (if it can observe the TSC going backwards when going from one CPU to another within a critical section), then the TSC clock-source is turned off. The TSC synchronization-checking code also got moved into a separate file. Signed-off-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Cc: john stultz <johnstul@us.ibm.com> Cc: Roman Zippel <zippel@linux-m68k.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'arch/i386/kernel/smpboot.c')
-rw-r--r--arch/i386/kernel/smpboot.c178
1 files changed, 12 insertions, 166 deletions
diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c
index f46a4d095e6c..6ddffe8aabb2 100644
--- a/arch/i386/kernel/smpboot.c
+++ b/arch/i386/kernel/smpboot.c
@@ -94,12 +94,6 @@ cpumask_t cpu_possible_map;
94EXPORT_SYMBOL(cpu_possible_map); 94EXPORT_SYMBOL(cpu_possible_map);
95static cpumask_t smp_commenced_mask; 95static cpumask_t smp_commenced_mask;
96 96
97/* TSC's upper 32 bits can't be written in eariler CPU (before prescott), there
98 * is no way to resync one AP against BP. TBD: for prescott and above, we
99 * should use IA64's algorithm
100 */
101static int __devinitdata tsc_sync_disabled;
102
103/* Per CPU bogomips and other parameters */ 97/* Per CPU bogomips and other parameters */
104struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned; 98struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
105EXPORT_SYMBOL(cpu_data); 99EXPORT_SYMBOL(cpu_data);
@@ -216,151 +210,6 @@ valid_k7:
216 ; 210 ;
217} 211}
218 212
219/*
220 * TSC synchronization.
221 *
222 * We first check whether all CPUs have their TSC's synchronized,
223 * then we print a warning if not, and always resync.
224 */
225
226static struct {
227 atomic_t start_flag;
228 atomic_t count_start;
229 atomic_t count_stop;
230 unsigned long long values[NR_CPUS];
231} tsc __cpuinitdata = {
232 .start_flag = ATOMIC_INIT(0),
233 .count_start = ATOMIC_INIT(0),
234 .count_stop = ATOMIC_INIT(0),
235};
236
237#define NR_LOOPS 5
238
239static void __init synchronize_tsc_bp(void)
240{
241 int i;
242 unsigned long long t0;
243 unsigned long long sum, avg;
244 long long delta;
245 unsigned int one_usec;
246 int buggy = 0;
247
248 printk(KERN_INFO "checking TSC synchronization across %u CPUs: ", num_booting_cpus());
249
250 /* convert from kcyc/sec to cyc/usec */
251 one_usec = cpu_khz / 1000;
252
253 atomic_set(&tsc.start_flag, 1);
254 wmb();
255
256 /*
257 * We loop a few times to get a primed instruction cache,
258 * then the last pass is more or less synchronized and
259 * the BP and APs set their cycle counters to zero all at
260 * once. This reduces the chance of having random offsets
261 * between the processors, and guarantees that the maximum
262 * delay between the cycle counters is never bigger than
263 * the latency of information-passing (cachelines) between
264 * two CPUs.
265 */
266 for (i = 0; i < NR_LOOPS; i++) {
267 /*
268 * all APs synchronize but they loop on '== num_cpus'
269 */
270 while (atomic_read(&tsc.count_start) != num_booting_cpus()-1)
271 cpu_relax();
272 atomic_set(&tsc.count_stop, 0);
273 wmb();
274 /*
275 * this lets the APs save their current TSC:
276 */
277 atomic_inc(&tsc.count_start);
278
279 rdtscll(tsc.values[smp_processor_id()]);
280 /*
281 * We clear the TSC in the last loop:
282 */
283 if (i == NR_LOOPS-1)
284 write_tsc(0, 0);
285
286 /*
287 * Wait for all APs to leave the synchronization point:
288 */
289 while (atomic_read(&tsc.count_stop) != num_booting_cpus()-1)
290 cpu_relax();
291 atomic_set(&tsc.count_start, 0);
292 wmb();
293 atomic_inc(&tsc.count_stop);
294 }
295
296 sum = 0;
297 for (i = 0; i < NR_CPUS; i++) {
298 if (cpu_isset(i, cpu_callout_map)) {
299 t0 = tsc.values[i];
300 sum += t0;
301 }
302 }
303 avg = sum;
304 do_div(avg, num_booting_cpus());
305
306 for (i = 0; i < NR_CPUS; i++) {
307 if (!cpu_isset(i, cpu_callout_map))
308 continue;
309 delta = tsc.values[i] - avg;
310 if (delta < 0)
311 delta = -delta;
312 /*
313 * We report bigger than 2 microseconds clock differences.
314 */
315 if (delta > 2*one_usec) {
316 long long realdelta;
317
318 if (!buggy) {
319 buggy = 1;
320 printk("\n");
321 }
322 realdelta = delta;
323 do_div(realdelta, one_usec);
324 if (tsc.values[i] < avg)
325 realdelta = -realdelta;
326
327 if (realdelta)
328 printk(KERN_INFO "CPU#%d had %Ld usecs TSC "
329 "skew, fixed it up.\n", i, realdelta);
330 }
331 }
332 if (!buggy)
333 printk("passed.\n");
334}
335
336static void __cpuinit synchronize_tsc_ap(void)
337{
338 int i;
339
340 /*
341 * Not every cpu is online at the time
342 * this gets called, so we first wait for the BP to
343 * finish SMP initialization:
344 */
345 while (!atomic_read(&tsc.start_flag))
346 cpu_relax();
347
348 for (i = 0; i < NR_LOOPS; i++) {
349 atomic_inc(&tsc.count_start);
350 while (atomic_read(&tsc.count_start) != num_booting_cpus())
351 cpu_relax();
352
353 rdtscll(tsc.values[smp_processor_id()]);
354 if (i == NR_LOOPS-1)
355 write_tsc(0, 0);
356
357 atomic_inc(&tsc.count_stop);
358 while (atomic_read(&tsc.count_stop) != num_booting_cpus())
359 cpu_relax();
360 }
361}
362#undef NR_LOOPS
363
364extern void calibrate_delay(void); 213extern void calibrate_delay(void);
365 214
366static atomic_t init_deasserted; 215static atomic_t init_deasserted;
@@ -446,12 +295,6 @@ static void __cpuinit smp_callin(void)
446 * Allow the master to continue. 295 * Allow the master to continue.
447 */ 296 */
448 cpu_set(cpuid, cpu_callin_map); 297 cpu_set(cpuid, cpu_callin_map);
449
450 /*
451 * Synchronize the TSC with the BP
452 */
453 if (cpu_has_tsc && cpu_khz && !tsc_sync_disabled)
454 synchronize_tsc_ap();
455} 298}
456 299
457static int cpucount; 300static int cpucount;
@@ -554,6 +397,11 @@ static void __cpuinit start_secondary(void *unused)
554 smp_callin(); 397 smp_callin();
555 while (!cpu_isset(smp_processor_id(), smp_commenced_mask)) 398 while (!cpu_isset(smp_processor_id(), smp_commenced_mask))
556 rep_nop(); 399 rep_nop();
400 /*
401 * Check TSC synchronization with the BP:
402 */
403 check_tsc_sync_target();
404
557 setup_secondary_clock(); 405 setup_secondary_clock();
558 if (nmi_watchdog == NMI_IO_APIC) { 406 if (nmi_watchdog == NMI_IO_APIC) {
559 disable_8259A_irq(0); 407 disable_8259A_irq(0);
@@ -1125,8 +973,6 @@ static int __cpuinit __smp_prepare_cpu(int cpu)
1125 info.cpu = cpu; 973 info.cpu = cpu;
1126 INIT_WORK(&info.task, do_warm_boot_cpu); 974 INIT_WORK(&info.task, do_warm_boot_cpu);
1127 975
1128 tsc_sync_disabled = 1;
1129
1130 /* init low mem mapping */ 976 /* init low mem mapping */
1131 clone_pgd_range(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS, 977 clone_pgd_range(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS,
1132 min_t(unsigned long, KERNEL_PGD_PTRS, USER_PGD_PTRS)); 978 min_t(unsigned long, KERNEL_PGD_PTRS, USER_PGD_PTRS));
@@ -1134,7 +980,6 @@ static int __cpuinit __smp_prepare_cpu(int cpu)
1134 schedule_work(&info.task); 980 schedule_work(&info.task);
1135 wait_for_completion(&done); 981 wait_for_completion(&done);
1136 982
1137 tsc_sync_disabled = 0;
1138 zap_low_mappings(); 983 zap_low_mappings();
1139 ret = 0; 984 ret = 0;
1140exit: 985exit:
@@ -1331,12 +1176,6 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
1331 smpboot_setup_io_apic(); 1176 smpboot_setup_io_apic();
1332 1177
1333 setup_boot_clock(); 1178 setup_boot_clock();
1334
1335 /*
1336 * Synchronize the TSC with the AP
1337 */
1338 if (cpu_has_tsc && cpucount && cpu_khz)
1339 synchronize_tsc_bp();
1340} 1179}
1341 1180
1342/* These are wrappers to interface to the new boot process. Someone 1181/* These are wrappers to interface to the new boot process. Someone
@@ -1471,9 +1310,16 @@ int __cpuinit __cpu_up(unsigned int cpu)
1471 } 1310 }
1472 1311
1473 local_irq_enable(); 1312 local_irq_enable();
1313
1474 per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; 1314 per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
1475 /* Unleash the CPU! */ 1315 /* Unleash the CPU! */
1476 cpu_set(cpu, smp_commenced_mask); 1316 cpu_set(cpu, smp_commenced_mask);
1317
1318 /*
1319 * Check TSC synchronization with the AP:
1320 */
1321 check_tsc_sync_source(cpu);
1322
1477 while (!cpu_isset(cpu, cpu_online_map)) 1323 while (!cpu_isset(cpu, cpu_online_map))
1478 cpu_relax(); 1324 cpu_relax();
1479 1325