diff options
| author | Ingo Molnar <mingo@elte.hu> | 2007-02-16 04:27:34 -0500 |
|---|---|---|
| committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-02-16 11:13:57 -0500 |
| commit | 95492e4646e5de8b43d9a7908d6177fb737b61f0 (patch) | |
| tree | ae25cd206ca76f78d50ac2a206ef012e0ab1d9df | |
| parent | 92c7e00254b2d0efc1e36ac3e45474ce1871b6b2 (diff) | |
[PATCH] x86: rewrite SMP TSC sync code
make the TSC synchronization code more robust, and unify it between x86_64 and
i386.
The biggest change is the removal of the 'fix up TSCs' code on x86_64 and
i386, in some rare cases it was /causing/ time-warps on SMP systems.
The new code only checks for TSC asynchronity - and if it can prove a
time-warp (if it can observe the TSC going backwards when going from one CPU
to another within a critical section), then the TSC clock-source is turned
off.
The TSC synchronization-checking code also got moved into a separate file.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: john stultz <johnstul@us.ibm.com>
Cc: Roman Zippel <zippel@linux-m68k.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
| -rw-r--r-- | arch/i386/kernel/Makefile | 2 | ||||
| -rw-r--r-- | arch/i386/kernel/smpboot.c | 178 | ||||
| -rw-r--r-- | arch/i386/kernel/tsc.c | 4 | ||||
| -rw-r--r-- | arch/i386/kernel/tsc_sync.c | 1 | ||||
| -rw-r--r-- | arch/x86_64/kernel/Makefile | 2 | ||||
| -rw-r--r-- | arch/x86_64/kernel/smpboot.c | 230 | ||||
| -rw-r--r-- | arch/x86_64/kernel/time.c | 11 | ||||
| -rw-r--r-- | arch/x86_64/kernel/tsc_sync.c | 187 | ||||
| -rw-r--r-- | include/asm-i386/tsc.h | 49 | ||||
| -rw-r--r-- | include/asm-x86_64/proto.h | 2 | ||||
| -rw-r--r-- | include/asm-x86_64/timex.h | 26 | ||||
| -rw-r--r-- | include/asm-x86_64/tsc.h | 66 |
12 files changed, 295 insertions, 463 deletions
diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile index cbe4e601885c..c2b3b79dc436 100644 --- a/arch/i386/kernel/Makefile +++ b/arch/i386/kernel/Makefile | |||
| @@ -18,7 +18,7 @@ obj-$(CONFIG_X86_MSR) += msr.o | |||
| 18 | obj-$(CONFIG_X86_CPUID) += cpuid.o | 18 | obj-$(CONFIG_X86_CPUID) += cpuid.o |
| 19 | obj-$(CONFIG_MICROCODE) += microcode.o | 19 | obj-$(CONFIG_MICROCODE) += microcode.o |
| 20 | obj-$(CONFIG_APM) += apm.o | 20 | obj-$(CONFIG_APM) += apm.o |
| 21 | obj-$(CONFIG_X86_SMP) += smp.o smpboot.o | 21 | obj-$(CONFIG_X86_SMP) += smp.o smpboot.o tsc_sync.o |
| 22 | obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o | 22 | obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o |
| 23 | obj-$(CONFIG_X86_MPPARSE) += mpparse.o | 23 | obj-$(CONFIG_X86_MPPARSE) += mpparse.o |
| 24 | obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o | 24 | obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o |
diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index f46a4d095e6c..6ddffe8aabb2 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c | |||
| @@ -94,12 +94,6 @@ cpumask_t cpu_possible_map; | |||
| 94 | EXPORT_SYMBOL(cpu_possible_map); | 94 | EXPORT_SYMBOL(cpu_possible_map); |
| 95 | static cpumask_t smp_commenced_mask; | 95 | static cpumask_t smp_commenced_mask; |
| 96 | 96 | ||
| 97 | /* TSC's upper 32 bits can't be written in eariler CPU (before prescott), there | ||
| 98 | * is no way to resync one AP against BP. TBD: for prescott and above, we | ||
| 99 | * should use IA64's algorithm | ||
| 100 | */ | ||
| 101 | static int __devinitdata tsc_sync_disabled; | ||
| 102 | |||
| 103 | /* Per CPU bogomips and other parameters */ | 97 | /* Per CPU bogomips and other parameters */ |
| 104 | struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned; | 98 | struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned; |
| 105 | EXPORT_SYMBOL(cpu_data); | 99 | EXPORT_SYMBOL(cpu_data); |
| @@ -216,151 +210,6 @@ valid_k7: | |||
| 216 | ; | 210 | ; |
| 217 | } | 211 | } |
| 218 | 212 | ||
| 219 | /* | ||
| 220 | * TSC synchronization. | ||
| 221 | * | ||
| 222 | * We first check whether all CPUs have their TSC's synchronized, | ||
| 223 | * then we print a warning if not, and always resync. | ||
| 224 | */ | ||
| 225 | |||
| 226 | static struct { | ||
| 227 | atomic_t start_flag; | ||
| 228 | atomic_t count_start; | ||
| 229 | atomic_t count_stop; | ||
| 230 | unsigned long long values[NR_CPUS]; | ||
| 231 | } tsc __cpuinitdata = { | ||
| 232 | .start_flag = ATOMIC_INIT(0), | ||
| 233 | .count_start = ATOMIC_INIT(0), | ||
| 234 | .count_stop = ATOMIC_INIT(0), | ||
| 235 | }; | ||
| 236 | |||
| 237 | #define NR_LOOPS 5 | ||
| 238 | |||
| 239 | static void __init synchronize_tsc_bp(void) | ||
| 240 | { | ||
| 241 | int i; | ||
| 242 | unsigned long long t0; | ||
| 243 | unsigned long long sum, avg; | ||
| 244 | long long delta; | ||
| 245 | unsigned int one_usec; | ||
| 246 | int buggy = 0; | ||
| 247 | |||
| 248 | printk(KERN_INFO "checking TSC synchronization across %u CPUs: ", num_booting_cpus()); | ||
| 249 | |||
| 250 | /* convert from kcyc/sec to cyc/usec */ | ||
| 251 | one_usec = cpu_khz / 1000; | ||
| 252 | |||
| 253 | atomic_set(&tsc.start_flag, 1); | ||
| 254 | wmb(); | ||
| 255 | |||
| 256 | /* | ||
| 257 | * We loop a few times to get a primed instruction cache, | ||
| 258 | * then the last pass is more or less synchronized and | ||
| 259 | * the BP and APs set their cycle counters to zero all at | ||
| 260 | * once. This reduces the chance of having random offsets | ||
| 261 | * between the processors, and guarantees that the maximum | ||
| 262 | * delay between the cycle counters is never bigger than | ||
| 263 | * the latency of information-passing (cachelines) between | ||
| 264 | * two CPUs. | ||
| 265 | */ | ||
| 266 | for (i = 0; i < NR_LOOPS; i++) { | ||
| 267 | /* | ||
| 268 | * all APs synchronize but they loop on '== num_cpus' | ||
| 269 | */ | ||
| 270 | while (atomic_read(&tsc.count_start) != num_booting_cpus()-1) | ||
| 271 | cpu_relax(); | ||
| 272 | atomic_set(&tsc.count_stop, 0); | ||
| 273 | wmb(); | ||
| 274 | /* | ||
| 275 | * this lets the APs save their current TSC: | ||
| 276 | */ | ||
| 277 | atomic_inc(&tsc.count_start); | ||
| 278 | |||
| 279 | rdtscll(tsc.values[smp_processor_id()]); | ||
| 280 | /* | ||
| 281 | * We clear the TSC in the last loop: | ||
| 282 | */ | ||
| 283 | if (i == NR_LOOPS-1) | ||
| 284 | write_tsc(0, 0); | ||
| 285 | |||
| 286 | /* | ||
| 287 | * Wait for all APs to leave the synchronization point: | ||
| 288 | */ | ||
| 289 | while (atomic_read(&tsc.count_stop) != num_booting_cpus()-1) | ||
| 290 | cpu_relax(); | ||
| 291 | atomic_set(&tsc.count_start, 0); | ||
| 292 | wmb(); | ||
| 293 | atomic_inc(&tsc.count_stop); | ||
| 294 | } | ||
| 295 | |||
| 296 | sum = 0; | ||
| 297 | for (i = 0; i < NR_CPUS; i++) { | ||
| 298 | if (cpu_isset(i, cpu_callout_map)) { | ||
| 299 | t0 = tsc.values[i]; | ||
| 300 | sum += t0; | ||
| 301 | } | ||
| 302 | } | ||
| 303 | avg = sum; | ||
| 304 | do_div(avg, num_booting_cpus()); | ||
| 305 | |||
| 306 | for (i = 0; i < NR_CPUS; i++) { | ||
| 307 | if (!cpu_isset(i, cpu_callout_map)) | ||
| 308 | continue; | ||
| 309 | delta = tsc.values[i] - avg; | ||
| 310 | if (delta < 0) | ||
| 311 | delta = -delta; | ||
| 312 | /* | ||
| 313 | * We report bigger than 2 microseconds clock differences. | ||
| 314 | */ | ||
| 315 | if (delta > 2*one_usec) { | ||
| 316 | long long realdelta; | ||
| 317 | |||
| 318 | if (!buggy) { | ||
| 319 | buggy = 1; | ||
| 320 | printk("\n"); | ||
| 321 | } | ||
| 322 | realdelta = delta; | ||
| 323 | do_div(realdelta, one_usec); | ||
| 324 | if (tsc.values[i] < avg) | ||
| 325 | realdelta = -realdelta; | ||
| 326 | |||
| 327 | if (realdelta) | ||
| 328 | printk(KERN_INFO "CPU#%d had %Ld usecs TSC " | ||
| 329 | "skew, fixed it up.\n", i, realdelta); | ||
| 330 | } | ||
| 331 | } | ||
| 332 | if (!buggy) | ||
| 333 | printk("passed.\n"); | ||
| 334 | } | ||
| 335 | |||
| 336 | static void __cpuinit synchronize_tsc_ap(void) | ||
| 337 | { | ||
| 338 | int i; | ||
| 339 | |||
| 340 | /* | ||
| 341 | * Not every cpu is online at the time | ||
| 342 | * this gets called, so we first wait for the BP to | ||
| 343 | * finish SMP initialization: | ||
| 344 | */ | ||
| 345 | while (!atomic_read(&tsc.start_flag)) | ||
| 346 | cpu_relax(); | ||
| 347 | |||
| 348 | for (i = 0; i < NR_LOOPS; i++) { | ||
| 349 | atomic_inc(&tsc.count_start); | ||
| 350 | while (atomic_read(&tsc.count_start) != num_booting_cpus()) | ||
| 351 | cpu_relax(); | ||
| 352 | |||
| 353 | rdtscll(tsc.values[smp_processor_id()]); | ||
| 354 | if (i == NR_LOOPS-1) | ||
| 355 | write_tsc(0, 0); | ||
| 356 | |||
| 357 | atomic_inc(&tsc.count_stop); | ||
| 358 | while (atomic_read(&tsc.count_stop) != num_booting_cpus()) | ||
| 359 | cpu_relax(); | ||
| 360 | } | ||
| 361 | } | ||
| 362 | #undef NR_LOOPS | ||
| 363 | |||
| 364 | extern void calibrate_delay(void); | 213 | extern void calibrate_delay(void); |
| 365 | 214 | ||
| 366 | static atomic_t init_deasserted; | 215 | static atomic_t init_deasserted; |
| @@ -446,12 +295,6 @@ static void __cpuinit smp_callin(void) | |||
| 446 | * Allow the master to continue. | 295 | * Allow the master to continue. |
| 447 | */ | 296 | */ |
| 448 | cpu_set(cpuid, cpu_callin_map); | 297 | cpu_set(cpuid, cpu_callin_map); |
| 449 | |||
| 450 | /* | ||
| 451 | * Synchronize the TSC with the BP | ||
| 452 | */ | ||
| 453 | if (cpu_has_tsc && cpu_khz && !tsc_sync_disabled) | ||
| 454 | synchronize_tsc_ap(); | ||
| 455 | } | 298 | } |
| 456 | 299 | ||
| 457 | static int cpucount; | 300 | static int cpucount; |
| @@ -554,6 +397,11 @@ static void __cpuinit start_secondary(void *unused) | |||
| 554 | smp_callin(); | 397 | smp_callin(); |
| 555 | while (!cpu_isset(smp_processor_id(), smp_commenced_mask)) | 398 | while (!cpu_isset(smp_processor_id(), smp_commenced_mask)) |
| 556 | rep_nop(); | 399 | rep_nop(); |
| 400 | /* | ||
| 401 | * Check TSC synchronization with the BP: | ||
| 402 | */ | ||
| 403 | check_tsc_sync_target(); | ||
| 404 | |||
| 557 | setup_secondary_clock(); | 405 | setup_secondary_clock(); |
| 558 | if (nmi_watchdog == NMI_IO_APIC) { | 406 | if (nmi_watchdog == NMI_IO_APIC) { |
| 559 | disable_8259A_irq(0); | 407 | disable_8259A_irq(0); |
| @@ -1125,8 +973,6 @@ static int __cpuinit __smp_prepare_cpu(int cpu) | |||
| 1125 | info.cpu = cpu; | 973 | info.cpu = cpu; |
| 1126 | INIT_WORK(&info.task, do_warm_boot_cpu); | 974 | INIT_WORK(&info.task, do_warm_boot_cpu); |
| 1127 | 975 | ||
| 1128 | tsc_sync_disabled = 1; | ||
| 1129 | |||
| 1130 | /* init low mem mapping */ | 976 | /* init low mem mapping */ |
| 1131 | clone_pgd_range(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS, | 977 | clone_pgd_range(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS, |
| 1132 | min_t(unsigned long, KERNEL_PGD_PTRS, USER_PGD_PTRS)); | 978 | min_t(unsigned long, KERNEL_PGD_PTRS, USER_PGD_PTRS)); |
| @@ -1134,7 +980,6 @@ static int __cpuinit __smp_prepare_cpu(int cpu) | |||
| 1134 | schedule_work(&info.task); | 980 | schedule_work(&info.task); |
| 1135 | wait_for_completion(&done); | 981 | wait_for_completion(&done); |
| 1136 | 982 | ||
| 1137 | tsc_sync_disabled = 0; | ||
| 1138 | zap_low_mappings(); | 983 | zap_low_mappings(); |
| 1139 | ret = 0; | 984 | ret = 0; |
| 1140 | exit: | 985 | exit: |
| @@ -1331,12 +1176,6 @@ static void __init smp_boot_cpus(unsigned int max_cpus) | |||
| 1331 | smpboot_setup_io_apic(); | 1176 | smpboot_setup_io_apic(); |
| 1332 | 1177 | ||
| 1333 | setup_boot_clock(); | 1178 | setup_boot_clock(); |
| 1334 | |||
| 1335 | /* | ||
| 1336 | * Synchronize the TSC with the AP | ||
| 1337 | */ | ||
| 1338 | if (cpu_has_tsc && cpucount && cpu_khz) | ||
| 1339 | synchronize_tsc_bp(); | ||
| 1340 | } | 1179 | } |
| 1341 | 1180 | ||
| 1342 | /* These are wrappers to interface to the new boot process. Someone | 1181 | /* These are wrappers to interface to the new boot process. Someone |
| @@ -1471,9 +1310,16 @@ int __cpuinit __cpu_up(unsigned int cpu) | |||
| 1471 | } | 1310 | } |
| 1472 | 1311 | ||
| 1473 | local_irq_enable(); | 1312 | local_irq_enable(); |
| 1313 | |||
| 1474 | per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; | 1314 | per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; |
| 1475 | /* Unleash the CPU! */ | 1315 | /* Unleash the CPU! */ |
| 1476 | cpu_set(cpu, smp_commenced_mask); | 1316 | cpu_set(cpu, smp_commenced_mask); |
| 1317 | |||
| 1318 | /* | ||
| 1319 | * Check TSC synchronization with the AP: | ||
| 1320 | */ | ||
| 1321 | check_tsc_sync_source(cpu); | ||
| 1322 | |||
| 1477 | while (!cpu_isset(cpu, cpu_online_map)) | 1323 | while (!cpu_isset(cpu, cpu_online_map)) |
| 1478 | cpu_relax(); | 1324 | cpu_relax(); |
| 1479 | 1325 | ||
diff --git a/arch/i386/kernel/tsc.c b/arch/i386/kernel/tsc.c index 6f6971da761c..0fd93107ff9a 100644 --- a/arch/i386/kernel/tsc.c +++ b/arch/i386/kernel/tsc.c | |||
| @@ -406,8 +406,10 @@ out: | |||
| 406 | * Make an educated guess if the TSC is trustworthy and synchronized | 406 | * Make an educated guess if the TSC is trustworthy and synchronized |
| 407 | * over all CPUs. | 407 | * over all CPUs. |
| 408 | */ | 408 | */ |
| 409 | static __init int unsynchronized_tsc(void) | 409 | __cpuinit int unsynchronized_tsc(void) |
| 410 | { | 410 | { |
| 411 | if (!cpu_has_tsc || tsc_unstable) | ||
| 412 | return 1; | ||
| 411 | /* | 413 | /* |
| 412 | * Intel systems are normally all synchronized. | 414 | * Intel systems are normally all synchronized. |
| 413 | * Exceptions must mark TSC as unstable: | 415 | * Exceptions must mark TSC as unstable: |
diff --git a/arch/i386/kernel/tsc_sync.c b/arch/i386/kernel/tsc_sync.c new file mode 100644 index 000000000000..12424629af87 --- /dev/null +++ b/arch/i386/kernel/tsc_sync.c | |||
| @@ -0,0 +1 @@ | |||
| #include "../../x86_64/kernel/tsc_sync.c" | |||
diff --git a/arch/x86_64/kernel/Makefile b/arch/x86_64/kernel/Makefile index ae399458024b..6465eee6d920 100644 --- a/arch/x86_64/kernel/Makefile +++ b/arch/x86_64/kernel/Makefile | |||
| @@ -19,7 +19,7 @@ obj-$(CONFIG_ACPI) += acpi/ | |||
| 19 | obj-$(CONFIG_X86_MSR) += msr.o | 19 | obj-$(CONFIG_X86_MSR) += msr.o |
| 20 | obj-$(CONFIG_MICROCODE) += microcode.o | 20 | obj-$(CONFIG_MICROCODE) += microcode.o |
| 21 | obj-$(CONFIG_X86_CPUID) += cpuid.o | 21 | obj-$(CONFIG_X86_CPUID) += cpuid.o |
| 22 | obj-$(CONFIG_SMP) += smp.o smpboot.o trampoline.o | 22 | obj-$(CONFIG_SMP) += smp.o smpboot.o trampoline.o tsc_sync.o |
| 23 | obj-y += apic.o nmi.o | 23 | obj-y += apic.o nmi.o |
| 24 | obj-y += io_apic.o mpparse.o \ | 24 | obj-y += io_apic.o mpparse.o \ |
| 25 | genapic.o genapic_cluster.o genapic_flat.o | 25 | genapic.o genapic_cluster.o genapic_flat.o |
diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c index daf19332f0dd..62d828433c30 100644 --- a/arch/x86_64/kernel/smpboot.c +++ b/arch/x86_64/kernel/smpboot.c | |||
| @@ -148,217 +148,6 @@ static void __cpuinit smp_store_cpu_info(int id) | |||
| 148 | print_cpu_info(c); | 148 | print_cpu_info(c); |
| 149 | } | 149 | } |
| 150 | 150 | ||
| 151 | /* | ||
| 152 | * New Funky TSC sync algorithm borrowed from IA64. | ||
| 153 | * Main advantage is that it doesn't reset the TSCs fully and | ||
| 154 | * in general looks more robust and it works better than my earlier | ||
| 155 | * attempts. I believe it was written by David Mosberger. Some minor | ||
| 156 | * adjustments for x86-64 by me -AK | ||
| 157 | * | ||
| 158 | * Original comment reproduced below. | ||
| 159 | * | ||
| 160 | * Synchronize TSC of the current (slave) CPU with the TSC of the | ||
| 161 | * MASTER CPU (normally the time-keeper CPU). We use a closed loop to | ||
| 162 | * eliminate the possibility of unaccounted-for errors (such as | ||
| 163 | * getting a machine check in the middle of a calibration step). The | ||
| 164 | * basic idea is for the slave to ask the master what itc value it has | ||
| 165 | * and to read its own itc before and after the master responds. Each | ||
| 166 | * iteration gives us three timestamps: | ||
| 167 | * | ||
| 168 | * slave master | ||
| 169 | * | ||
| 170 | * t0 ---\ | ||
| 171 | * ---\ | ||
| 172 | * ---> | ||
| 173 | * tm | ||
| 174 | * /--- | ||
| 175 | * /--- | ||
| 176 | * t1 <--- | ||
| 177 | * | ||
| 178 | * | ||
| 179 | * The goal is to adjust the slave's TSC such that tm falls exactly | ||
| 180 | * half-way between t0 and t1. If we achieve this, the clocks are | ||
| 181 | * synchronized provided the interconnect between the slave and the | ||
| 182 | * master is symmetric. Even if the interconnect were asymmetric, we | ||
| 183 | * would still know that the synchronization error is smaller than the | ||
| 184 | * roundtrip latency (t0 - t1). | ||
| 185 | * | ||
| 186 | * When the interconnect is quiet and symmetric, this lets us | ||
| 187 | * synchronize the TSC to within one or two cycles. However, we can | ||
| 188 | * only *guarantee* that the synchronization is accurate to within a | ||
| 189 | * round-trip time, which is typically in the range of several hundred | ||
| 190 | * cycles (e.g., ~500 cycles). In practice, this means that the TSCs | ||
| 191 | * are usually almost perfectly synchronized, but we shouldn't assume | ||
| 192 | * that the accuracy is much better than half a micro second or so. | ||
| 193 | * | ||
| 194 | * [there are other errors like the latency of RDTSC and of the | ||
| 195 | * WRMSR. These can also account to hundreds of cycles. So it's | ||
| 196 | * probably worse. It claims 153 cycles error on a dual Opteron, | ||
| 197 | * but I suspect the numbers are actually somewhat worse -AK] | ||
| 198 | */ | ||
| 199 | |||
| 200 | #define MASTER 0 | ||
| 201 | #define SLAVE (SMP_CACHE_BYTES/8) | ||
| 202 | |||
| 203 | /* Intentionally don't use cpu_relax() while TSC synchronization | ||
| 204 | because we don't want to go into funky power save modi or cause | ||
| 205 | hypervisors to schedule us away. Going to sleep would likely affect | ||
| 206 | latency and low latency is the primary objective here. -AK */ | ||
| 207 | #define no_cpu_relax() barrier() | ||
| 208 | |||
| 209 | static __cpuinitdata DEFINE_SPINLOCK(tsc_sync_lock); | ||
| 210 | static volatile __cpuinitdata unsigned long go[SLAVE + 1]; | ||
| 211 | static int notscsync __cpuinitdata; | ||
| 212 | |||
| 213 | #undef DEBUG_TSC_SYNC | ||
| 214 | |||
| 215 | #define NUM_ROUNDS 64 /* magic value */ | ||
| 216 | #define NUM_ITERS 5 /* likewise */ | ||
| 217 | |||
| 218 | /* Callback on boot CPU */ | ||
| 219 | static __cpuinit void sync_master(void *arg) | ||
| 220 | { | ||
| 221 | unsigned long flags, i; | ||
| 222 | |||
| 223 | go[MASTER] = 0; | ||
| 224 | |||
| 225 | local_irq_save(flags); | ||
| 226 | { | ||
| 227 | for (i = 0; i < NUM_ROUNDS*NUM_ITERS; ++i) { | ||
| 228 | while (!go[MASTER]) | ||
| 229 | no_cpu_relax(); | ||
| 230 | go[MASTER] = 0; | ||
| 231 | rdtscll(go[SLAVE]); | ||
| 232 | } | ||
| 233 | } | ||
| 234 | local_irq_restore(flags); | ||
| 235 | } | ||
| 236 | |||
| 237 | /* | ||
| 238 | * Return the number of cycles by which our tsc differs from the tsc | ||
| 239 | * on the master (time-keeper) CPU. A positive number indicates our | ||
| 240 | * tsc is ahead of the master, negative that it is behind. | ||
| 241 | */ | ||
| 242 | static inline long | ||
| 243 | get_delta(long *rt, long *master) | ||
| 244 | { | ||
| 245 | unsigned long best_t0 = 0, best_t1 = ~0UL, best_tm = 0; | ||
| 246 | unsigned long tcenter, t0, t1, tm; | ||
| 247 | int i; | ||
| 248 | |||
| 249 | for (i = 0; i < NUM_ITERS; ++i) { | ||
| 250 | rdtscll(t0); | ||
| 251 | go[MASTER] = 1; | ||
| 252 | while (!(tm = go[SLAVE])) | ||
| 253 | no_cpu_relax(); | ||
| 254 | go[SLAVE] = 0; | ||
| 255 | rdtscll(t1); | ||
| 256 | |||
| 257 | if (t1 - t0 < best_t1 - best_t0) | ||
| 258 | best_t0 = t0, best_t1 = t1, best_tm = tm; | ||
| 259 | } | ||
| 260 | |||
| 261 | *rt = best_t1 - best_t0; | ||
| 262 | *master = best_tm - best_t0; | ||
| 263 | |||
| 264 | /* average best_t0 and best_t1 without overflow: */ | ||
| 265 | tcenter = (best_t0/2 + best_t1/2); | ||
| 266 | if (best_t0 % 2 + best_t1 % 2 == 2) | ||
| 267 | ++tcenter; | ||
| 268 | return tcenter - best_tm; | ||
| 269 | } | ||
| 270 | |||
| 271 | static __cpuinit void sync_tsc(unsigned int master) | ||
| 272 | { | ||
| 273 | int i, done = 0; | ||
| 274 | long delta, adj, adjust_latency = 0; | ||
| 275 | unsigned long flags, rt, master_time_stamp, bound; | ||
| 276 | #ifdef DEBUG_TSC_SYNC | ||
| 277 | static struct syncdebug { | ||
| 278 | long rt; /* roundtrip time */ | ||
| 279 | long master; /* master's timestamp */ | ||
| 280 | long diff; /* difference between midpoint and master's timestamp */ | ||
| 281 | long lat; /* estimate of tsc adjustment latency */ | ||
| 282 | } t[NUM_ROUNDS] __cpuinitdata; | ||
| 283 | #endif | ||
| 284 | |||
| 285 | printk(KERN_INFO "CPU %d: Syncing TSC to CPU %u.\n", | ||
| 286 | smp_processor_id(), master); | ||
| 287 | |||
| 288 | go[MASTER] = 1; | ||
| 289 | |||
| 290 | /* It is dangerous to broadcast IPI as cpus are coming up, | ||
| 291 | * as they may not be ready to accept them. So since | ||
| 292 | * we only need to send the ipi to the boot cpu direct | ||
| 293 | * the message, and avoid the race. | ||
| 294 | */ | ||
| 295 | smp_call_function_single(master, sync_master, NULL, 1, 0); | ||
| 296 | |||
| 297 | while (go[MASTER]) /* wait for master to be ready */ | ||
| 298 | no_cpu_relax(); | ||
| 299 | |||
| 300 | spin_lock_irqsave(&tsc_sync_lock, flags); | ||
| 301 | { | ||
| 302 | for (i = 0; i < NUM_ROUNDS; ++i) { | ||
| 303 | delta = get_delta(&rt, &master_time_stamp); | ||
| 304 | if (delta == 0) { | ||
| 305 | done = 1; /* let's lock on to this... */ | ||
| 306 | bound = rt; | ||
| 307 | } | ||
| 308 | |||
| 309 | if (!done) { | ||
| 310 | unsigned long t; | ||
| 311 | if (i > 0) { | ||
| 312 | adjust_latency += -delta; | ||
| 313 | adj = -delta + adjust_latency/4; | ||
| 314 | } else | ||
| 315 | adj = -delta; | ||
| 316 | |||
| 317 | rdtscll(t); | ||
| 318 | wrmsrl(MSR_IA32_TSC, t + adj); | ||
| 319 | } | ||
| 320 | #ifdef DEBUG_TSC_SYNC | ||
| 321 | t[i].rt = rt; | ||
| 322 | t[i].master = master_time_stamp; | ||
| 323 | t[i].diff = delta; | ||
| 324 | t[i].lat = adjust_latency/4; | ||
| 325 | #endif | ||
| 326 | } | ||
| 327 | } | ||
| 328 | spin_unlock_irqrestore(&tsc_sync_lock, flags); | ||
| 329 | |||
| 330 | #ifdef DEBUG_TSC_SYNC | ||
| 331 | for (i = 0; i < NUM_ROUNDS; ++i) | ||
| 332 | printk("rt=%5ld master=%5ld diff=%5ld adjlat=%5ld\n", | ||
| 333 | t[i].rt, t[i].master, t[i].diff, t[i].lat); | ||
| 334 | #endif | ||
| 335 | |||
| 336 | printk(KERN_INFO | ||
| 337 | "CPU %d: synchronized TSC with CPU %u (last diff %ld cycles, " | ||
| 338 | "maxerr %lu cycles)\n", | ||
| 339 | smp_processor_id(), master, delta, rt); | ||
| 340 | } | ||
| 341 | |||
| 342 | static void __cpuinit tsc_sync_wait(void) | ||
| 343 | { | ||
| 344 | /* | ||
| 345 | * When the CPU has synchronized TSCs assume the BIOS | ||
| 346 | * or the hardware already synced. Otherwise we could | ||
| 347 | * mess up a possible perfect synchronization with a | ||
| 348 | * not-quite-perfect algorithm. | ||
| 349 | */ | ||
| 350 | if (notscsync || !cpu_has_tsc || !unsynchronized_tsc()) | ||
| 351 | return; | ||
| 352 | sync_tsc(0); | ||
| 353 | } | ||
| 354 | |||
| 355 | static __init int notscsync_setup(char *s) | ||
| 356 | { | ||
| 357 | notscsync = 1; | ||
| 358 | return 1; | ||
| 359 | } | ||
| 360 | __setup("notscsync", notscsync_setup); | ||
| 361 | |||
| 362 | static atomic_t init_deasserted __cpuinitdata; | 151 | static atomic_t init_deasserted __cpuinitdata; |
| 363 | 152 | ||
| 364 | /* | 153 | /* |
| @@ -546,6 +335,11 @@ void __cpuinit start_secondary(void) | |||
| 546 | /* otherwise gcc will move up the smp_processor_id before the cpu_init */ | 335 | /* otherwise gcc will move up the smp_processor_id before the cpu_init */ |
| 547 | barrier(); | 336 | barrier(); |
| 548 | 337 | ||
| 338 | /* | ||
| 339 | * Check TSC sync first: | ||
| 340 | */ | ||
| 341 | check_tsc_sync_target(); | ||
| 342 | |||
| 549 | Dprintk("cpu %d: setting up apic clock\n", smp_processor_id()); | 343 | Dprintk("cpu %d: setting up apic clock\n", smp_processor_id()); |
| 550 | setup_secondary_APIC_clock(); | 344 | setup_secondary_APIC_clock(); |
| 551 | 345 | ||
| @@ -565,14 +359,6 @@ void __cpuinit start_secondary(void) | |||
| 565 | */ | 359 | */ |
| 566 | set_cpu_sibling_map(smp_processor_id()); | 360 | set_cpu_sibling_map(smp_processor_id()); |
| 567 | 361 | ||
| 568 | /* | ||
| 569 | * Wait for TSC sync to not schedule things before. | ||
| 570 | * We still process interrupts, which could see an inconsistent | ||
| 571 | * time in that window unfortunately. | ||
| 572 | * Do this here because TSC sync has global unprotected state. | ||
| 573 | */ | ||
| 574 | tsc_sync_wait(); | ||
| 575 | |||
| 576 | /* | 362 | /* |
| 577 | * We need to hold call_lock, so there is no inconsistency | 363 | * We need to hold call_lock, so there is no inconsistency |
| 578 | * between the time smp_call_function() determines number of | 364 | * between the time smp_call_function() determines number of |
| @@ -592,6 +378,7 @@ void __cpuinit start_secondary(void) | |||
| 592 | cpu_set(smp_processor_id(), cpu_online_map); | 378 | cpu_set(smp_processor_id(), cpu_online_map); |
| 593 | per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; | 379 | per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; |
| 594 | spin_unlock(&vector_lock); | 380 | spin_unlock(&vector_lock); |
| 381 | |||
| 595 | unlock_ipi_call_lock(); | 382 | unlock_ipi_call_lock(); |
| 596 | 383 | ||
| 597 | cpu_idle(); | 384 | cpu_idle(); |
| @@ -1168,6 +955,11 @@ int __cpuinit __cpu_up(unsigned int cpu) | |||
| 1168 | /* Unleash the CPU! */ | 955 | /* Unleash the CPU! */ |
| 1169 | Dprintk("waiting for cpu %d\n", cpu); | 956 | Dprintk("waiting for cpu %d\n", cpu); |
| 1170 | 957 | ||
| 958 | /* | ||
| 959 | * Make sure and check TSC sync: | ||
| 960 | */ | ||
| 961 | check_tsc_sync_source(cpu); | ||
| 962 | |||
| 1171 | while (!cpu_isset(cpu, cpu_online_map)) | 963 | while (!cpu_isset(cpu, cpu_online_map)) |
| 1172 | cpu_relax(); | 964 | cpu_relax(); |
| 1173 | 965 | ||
diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c index 3cc6886f1fb7..8cb2b2d35f5d 100644 --- a/arch/x86_64/kernel/time.c +++ b/arch/x86_64/kernel/time.c | |||
| @@ -944,12 +944,23 @@ void __init time_init(void) | |||
| 944 | #endif | 944 | #endif |
| 945 | } | 945 | } |
| 946 | 946 | ||
| 947 | static int tsc_unstable = 0; | ||
| 948 | |||
| 949 | void mark_tsc_unstable(void) | ||
| 950 | { | ||
| 951 | tsc_unstable = 1; | ||
| 952 | } | ||
| 953 | EXPORT_SYMBOL_GPL(mark_tsc_unstable); | ||
| 954 | |||
| 947 | /* | 955 | /* |
| 948 | * Make an educated guess if the TSC is trustworthy and synchronized | 956 | * Make an educated guess if the TSC is trustworthy and synchronized |
| 949 | * over all CPUs. | 957 | * over all CPUs. |
| 950 | */ | 958 | */ |
| 951 | __cpuinit int unsynchronized_tsc(void) | 959 | __cpuinit int unsynchronized_tsc(void) |
| 952 | { | 960 | { |
| 961 | if (tsc_unstable) | ||
| 962 | return 1; | ||
| 963 | |||
| 953 | #ifdef CONFIG_SMP | 964 | #ifdef CONFIG_SMP |
| 954 | if (apic_is_clustered_box()) | 965 | if (apic_is_clustered_box()) |
| 955 | return 1; | 966 | return 1; |
diff --git a/arch/x86_64/kernel/tsc_sync.c b/arch/x86_64/kernel/tsc_sync.c new file mode 100644 index 000000000000..014f0db45dfa --- /dev/null +++ b/arch/x86_64/kernel/tsc_sync.c | |||
| @@ -0,0 +1,187 @@ | |||
| 1 | /* | ||
| 2 | * arch/x86_64/kernel/tsc_sync.c: check TSC synchronization. | ||
| 3 | * | ||
| 4 | * Copyright (C) 2006, Red Hat, Inc., Ingo Molnar | ||
| 5 | * | ||
| 6 | * We check whether all boot CPUs have their TSC's synchronized, | ||
| 7 | * print a warning if not and turn off the TSC clock-source. | ||
| 8 | * | ||
| 9 | * The warp-check is point-to-point between two CPUs, the CPU | ||
| 10 | * initiating the bootup is the 'source CPU', the freshly booting | ||
| 11 | * CPU is the 'target CPU'. | ||
| 12 | * | ||
| 13 | * Only two CPUs may participate - they can enter in any order. | ||
| 14 | * ( The serial nature of the boot logic and the CPU hotplug lock | ||
| 15 | * protects against more than 2 CPUs entering this code. ) | ||
| 16 | */ | ||
| 17 | #include <linux/spinlock.h> | ||
| 18 | #include <linux/kernel.h> | ||
| 19 | #include <linux/init.h> | ||
| 20 | #include <linux/smp.h> | ||
| 21 | #include <linux/nmi.h> | ||
| 22 | #include <asm/tsc.h> | ||
| 23 | |||
| 24 | /* | ||
| 25 | * Entry/exit counters that make sure that both CPUs | ||
| 26 | * run the measurement code at once: | ||
| 27 | */ | ||
| 28 | static __cpuinitdata atomic_t start_count; | ||
| 29 | static __cpuinitdata atomic_t stop_count; | ||
| 30 | |||
| 31 | /* | ||
| 32 | * We use a raw spinlock in this exceptional case, because | ||
| 33 | * we want to have the fastest, inlined, non-debug version | ||
| 34 | * of a critical section, to be able to prove TSC time-warps: | ||
| 35 | */ | ||
| 36 | static __cpuinitdata raw_spinlock_t sync_lock = __RAW_SPIN_LOCK_UNLOCKED; | ||
| 37 | static __cpuinitdata cycles_t last_tsc; | ||
| 38 | static __cpuinitdata cycles_t max_warp; | ||
| 39 | static __cpuinitdata int nr_warps; | ||
| 40 | |||
| 41 | /* | ||
| 42 | * TSC-warp measurement loop running on both CPUs: | ||
| 43 | */ | ||
| 44 | static __cpuinit void check_tsc_warp(void) | ||
| 45 | { | ||
| 46 | cycles_t start, now, prev, end; | ||
| 47 | int i; | ||
| 48 | |||
| 49 | start = get_cycles_sync(); | ||
| 50 | /* | ||
| 51 | * The measurement runs for 20 msecs: | ||
| 52 | */ | ||
| 53 | end = start + cpu_khz * 20ULL; | ||
| 54 | now = start; | ||
| 55 | |||
| 56 | for (i = 0; ; i++) { | ||
| 57 | /* | ||
| 58 | * We take the global lock, measure TSC, save the | ||
| 59 | * previous TSC that was measured (possibly on | ||
| 60 | * another CPU) and update the previous TSC timestamp. | ||
| 61 | */ | ||
| 62 | __raw_spin_lock(&sync_lock); | ||
| 63 | prev = last_tsc; | ||
| 64 | now = get_cycles_sync(); | ||
| 65 | last_tsc = now; | ||
| 66 | __raw_spin_unlock(&sync_lock); | ||
| 67 | |||
| 68 | /* | ||
| 69 | * Be nice every now and then (and also check whether | ||
| 70 | * measurement is done [we also insert a 100 million | ||
| 71 | * loops safety exit, so we dont lock up in case the | ||
| 72 | * TSC readout is totally broken]): | ||
| 73 | */ | ||
| 74 | if (unlikely(!(i & 7))) { | ||
| 75 | if (now > end || i > 100000000) | ||
| 76 | break; | ||
| 77 | cpu_relax(); | ||
| 78 | touch_nmi_watchdog(); | ||
| 79 | } | ||
| 80 | /* | ||
| 81 | * Outside the critical section we can now see whether | ||
| 82 | * we saw a time-warp of the TSC going backwards: | ||
| 83 | */ | ||
| 84 | if (unlikely(prev > now)) { | ||
| 85 | __raw_spin_lock(&sync_lock); | ||
| 86 | max_warp = max(max_warp, prev - now); | ||
| 87 | nr_warps++; | ||
| 88 | __raw_spin_unlock(&sync_lock); | ||
| 89 | } | ||
| 90 | |||
| 91 | } | ||
| 92 | } | ||
| 93 | |||
| 94 | /* | ||
| 95 | * Source CPU calls into this - it waits for the freshly booted | ||
| 96 | * target CPU to arrive and then starts the measurement: | ||
| 97 | */ | ||
| 98 | void __cpuinit check_tsc_sync_source(int cpu) | ||
| 99 | { | ||
| 100 | int cpus = 2; | ||
| 101 | |||
| 102 | /* | ||
| 103 | * No need to check if we already know that the TSC is not | ||
| 104 | * synchronized: | ||
| 105 | */ | ||
| 106 | if (unsynchronized_tsc()) | ||
| 107 | return; | ||
| 108 | |||
| 109 | printk(KERN_INFO "checking TSC synchronization [CPU#%d -> CPU#%d]:", | ||
| 110 | smp_processor_id(), cpu); | ||
| 111 | |||
| 112 | /* | ||
| 113 | * Reset it - in case this is a second bootup: | ||
| 114 | */ | ||
| 115 | atomic_set(&stop_count, 0); | ||
| 116 | |||
| 117 | /* | ||
| 118 | * Wait for the target to arrive: | ||
| 119 | */ | ||
| 120 | while (atomic_read(&start_count) != cpus-1) | ||
| 121 | cpu_relax(); | ||
| 122 | /* | ||
| 123 | * Trigger the target to continue into the measurement too: | ||
| 124 | */ | ||
| 125 | atomic_inc(&start_count); | ||
| 126 | |||
| 127 | check_tsc_warp(); | ||
| 128 | |||
| 129 | while (atomic_read(&stop_count) != cpus-1) | ||
| 130 | cpu_relax(); | ||
| 131 | |||
| 132 | /* | ||
| 133 | * Reset it - just in case we boot another CPU later: | ||
| 134 | */ | ||
| 135 | atomic_set(&start_count, 0); | ||
| 136 | |||
| 137 | if (nr_warps) { | ||
| 138 | printk("\n"); | ||
| 139 | printk(KERN_WARNING "Measured %Ld cycles TSC warp between CPUs," | ||
| 140 | " turning off TSC clock.\n", max_warp); | ||
| 141 | mark_tsc_unstable(); | ||
| 142 | nr_warps = 0; | ||
| 143 | max_warp = 0; | ||
| 144 | last_tsc = 0; | ||
| 145 | } else { | ||
| 146 | printk(" passed.\n"); | ||
| 147 | } | ||
| 148 | |||
| 149 | /* | ||
| 150 | * Let the target continue with the bootup: | ||
| 151 | */ | ||
| 152 | atomic_inc(&stop_count); | ||
| 153 | } | ||
| 154 | |||
| 155 | /* | ||
| 156 | * Freshly booted CPUs call into this: | ||
| 157 | */ | ||
| 158 | void __cpuinit check_tsc_sync_target(void) | ||
| 159 | { | ||
| 160 | int cpus = 2; | ||
| 161 | |||
| 162 | if (unsynchronized_tsc()) | ||
| 163 | return; | ||
| 164 | |||
| 165 | /* | ||
| 166 | * Register this CPU's participation and wait for the | ||
| 167 | * source CPU to start the measurement: | ||
| 168 | */ | ||
| 169 | atomic_inc(&start_count); | ||
| 170 | while (atomic_read(&start_count) != cpus) | ||
| 171 | cpu_relax(); | ||
| 172 | |||
| 173 | check_tsc_warp(); | ||
| 174 | |||
| 175 | /* | ||
| 176 | * Ok, we are done: | ||
| 177 | */ | ||
| 178 | atomic_inc(&stop_count); | ||
| 179 | |||
| 180 | /* | ||
| 181 | * Wait for the source CPU to print stuff: | ||
| 182 | */ | ||
| 183 | while (atomic_read(&stop_count) != cpus) | ||
| 184 | cpu_relax(); | ||
| 185 | } | ||
| 186 | #undef NR_LOOPS | ||
| 187 | |||
diff --git a/include/asm-i386/tsc.h b/include/asm-i386/tsc.h index c13933185c1c..e997891cc7cc 100644 --- a/include/asm-i386/tsc.h +++ b/include/asm-i386/tsc.h | |||
| @@ -1,48 +1 @@ | |||
| 1 | /* | #include <asm-x86_64/tsc.h> | |
| 2 | * linux/include/asm-i386/tsc.h | ||
| 3 | * | ||
| 4 | * i386 TSC related functions | ||
| 5 | */ | ||
| 6 | #ifndef _ASM_i386_TSC_H | ||
| 7 | #define _ASM_i386_TSC_H | ||
| 8 | |||
| 9 | #include <asm/processor.h> | ||
| 10 | |||
| 11 | /* | ||
| 12 | * Standard way to access the cycle counter on i586+ CPUs. | ||
| 13 | * Currently only used on SMP. | ||
| 14 | * | ||
| 15 | * If you really have a SMP machine with i486 chips or older, | ||
| 16 | * compile for that, and this will just always return zero. | ||
| 17 | * That's ok, it just means that the nicer scheduling heuristics | ||
| 18 | * won't work for you. | ||
| 19 | * | ||
| 20 | * We only use the low 32 bits, and we'd simply better make sure | ||
| 21 | * that we reschedule before that wraps. Scheduling at least every | ||
| 22 | * four billion cycles just basically sounds like a good idea, | ||
| 23 | * regardless of how fast the machine is. | ||
| 24 | */ | ||
| 25 | typedef unsigned long long cycles_t; | ||
| 26 | |||
| 27 | extern unsigned int cpu_khz; | ||
| 28 | extern unsigned int tsc_khz; | ||
| 29 | |||
| 30 | static inline cycles_t get_cycles(void) | ||
| 31 | { | ||
| 32 | unsigned long long ret = 0; | ||
| 33 | |||
| 34 | #ifndef CONFIG_X86_TSC | ||
| 35 | if (!cpu_has_tsc) | ||
| 36 | return 0; | ||
| 37 | #endif | ||
| 38 | |||
| 39 | #if defined(CONFIG_X86_GENERIC) || defined(CONFIG_X86_TSC) | ||
| 40 | rdtscll(ret); | ||
| 41 | #endif | ||
| 42 | return ret; | ||
| 43 | } | ||
| 44 | |||
| 45 | extern void tsc_init(void); | ||
| 46 | extern void mark_tsc_unstable(void); | ||
| 47 | |||
| 48 | #endif | ||
diff --git a/include/asm-x86_64/proto.h b/include/asm-x86_64/proto.h index a6d2ff5c69b7..2ce3adf7bfdc 100644 --- a/include/asm-x86_64/proto.h +++ b/include/asm-x86_64/proto.h | |||
| @@ -91,8 +91,6 @@ extern void check_efer(void); | |||
| 91 | 91 | ||
| 92 | extern int unhandled_signal(struct task_struct *tsk, int sig); | 92 | extern int unhandled_signal(struct task_struct *tsk, int sig); |
| 93 | 93 | ||
| 94 | extern int unsynchronized_tsc(void); | ||
| 95 | |||
| 96 | extern void select_idle_routine(const struct cpuinfo_x86 *c); | 94 | extern void select_idle_routine(const struct cpuinfo_x86 *c); |
| 97 | 95 | ||
| 98 | extern unsigned long table_start, table_end; | 96 | extern unsigned long table_start, table_end; |
diff --git a/include/asm-x86_64/timex.h b/include/asm-x86_64/timex.h index b9e5320b7625..a4493a77d641 100644 --- a/include/asm-x86_64/timex.h +++ b/include/asm-x86_64/timex.h | |||
| @@ -12,35 +12,11 @@ | |||
| 12 | #include <asm/hpet.h> | 12 | #include <asm/hpet.h> |
| 13 | #include <asm/system.h> | 13 | #include <asm/system.h> |
| 14 | #include <asm/processor.h> | 14 | #include <asm/processor.h> |
| 15 | #include <asm/tsc.h> | ||
| 15 | #include <linux/compiler.h> | 16 | #include <linux/compiler.h> |
| 16 | 17 | ||
| 17 | #define CLOCK_TICK_RATE PIT_TICK_RATE /* Underlying HZ */ | 18 | #define CLOCK_TICK_RATE PIT_TICK_RATE /* Underlying HZ */ |
| 18 | 19 | ||
| 19 | typedef unsigned long long cycles_t; | ||
| 20 | |||
| 21 | static inline cycles_t get_cycles (void) | ||
| 22 | { | ||
| 23 | unsigned long long ret; | ||
| 24 | |||
| 25 | rdtscll(ret); | ||
| 26 | return ret; | ||
| 27 | } | ||
| 28 | |||
| 29 | /* Like get_cycles, but make sure the CPU is synchronized. */ | ||
| 30 | static __always_inline cycles_t get_cycles_sync(void) | ||
| 31 | { | ||
| 32 | unsigned long long ret; | ||
| 33 | unsigned eax; | ||
| 34 | /* Don't do an additional sync on CPUs where we know | ||
| 35 | RDTSC is already synchronous. */ | ||
| 36 | alternative_io("cpuid", ASM_NOP2, X86_FEATURE_SYNC_RDTSC, | ||
| 37 | "=a" (eax), "0" (1) : "ebx","ecx","edx","memory"); | ||
| 38 | rdtscll(ret); | ||
| 39 | return ret; | ||
| 40 | } | ||
| 41 | |||
| 42 | extern unsigned int cpu_khz; | ||
| 43 | |||
| 44 | extern int read_current_timer(unsigned long *timer_value); | 20 | extern int read_current_timer(unsigned long *timer_value); |
| 45 | #define ARCH_HAS_READ_CURRENT_TIMER 1 | 21 | #define ARCH_HAS_READ_CURRENT_TIMER 1 |
| 46 | 22 | ||
diff --git a/include/asm-x86_64/tsc.h b/include/asm-x86_64/tsc.h new file mode 100644 index 000000000000..9a0a368852c7 --- /dev/null +++ b/include/asm-x86_64/tsc.h | |||
| @@ -0,0 +1,66 @@ | |||
| 1 | /* | ||
| 2 | * linux/include/asm-x86_64/tsc.h | ||
| 3 | * | ||
| 4 | * x86_64 TSC related functions | ||
| 5 | */ | ||
| 6 | #ifndef _ASM_x86_64_TSC_H | ||
| 7 | #define _ASM_x86_64_TSC_H | ||
| 8 | |||
| 9 | #include <asm/processor.h> | ||
| 10 | |||
| 11 | /* | ||
| 12 | * Standard way to access the cycle counter. | ||
| 13 | */ | ||
| 14 | typedef unsigned long long cycles_t; | ||
| 15 | |||
| 16 | extern unsigned int cpu_khz; | ||
| 17 | extern unsigned int tsc_khz; | ||
| 18 | |||
| 19 | static inline cycles_t get_cycles(void) | ||
| 20 | { | ||
| 21 | unsigned long long ret = 0; | ||
| 22 | |||
| 23 | #ifndef CONFIG_X86_TSC | ||
| 24 | if (!cpu_has_tsc) | ||
| 25 | return 0; | ||
| 26 | #endif | ||
| 27 | |||
| 28 | #if defined(CONFIG_X86_GENERIC) || defined(CONFIG_X86_TSC) | ||
| 29 | rdtscll(ret); | ||
| 30 | #endif | ||
| 31 | return ret; | ||
| 32 | } | ||
| 33 | |||
| 34 | /* Like get_cycles, but make sure the CPU is synchronized. */ | ||
| 35 | static __always_inline cycles_t get_cycles_sync(void) | ||
| 36 | { | ||
| 37 | unsigned long long ret; | ||
| 38 | #ifdef X86_FEATURE_SYNC_RDTSC | ||
| 39 | unsigned eax; | ||
| 40 | |||
| 41 | /* | ||
| 42 | * Don't do an additional sync on CPUs where we know | ||
| 43 | * RDTSC is already synchronous: | ||
| 44 | */ | ||
| 45 | alternative_io("cpuid", ASM_NOP2, X86_FEATURE_SYNC_RDTSC, | ||
| 46 | "=a" (eax), "0" (1) : "ebx","ecx","edx","memory"); | ||
| 47 | #else | ||
| 48 | sync_core(); | ||
| 49 | #endif | ||
| 50 | rdtscll(ret); | ||
| 51 | |||
| 52 | return ret; | ||
| 53 | } | ||
| 54 | |||
| 55 | extern void tsc_init(void); | ||
| 56 | extern void mark_tsc_unstable(void); | ||
| 57 | extern int unsynchronized_tsc(void); | ||
| 58 | |||
| 59 | /* | ||
| 60 | * Boot-time check whether the TSCs are synchronized across | ||
| 61 | * all CPUs/cores: | ||
| 62 | */ | ||
| 63 | extern void check_tsc_sync_source(int cpu); | ||
| 64 | extern void check_tsc_sync_target(void); | ||
| 65 | |||
| 66 | #endif | ||
