aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/tsc.c
diff options
context:
space:
mode:
authorAndy Lutomirski <luto@MIT.EDU>2011-05-23 09:31:25 -0400
committerThomas Gleixner <tglx@linutronix.de>2011-05-24 08:51:28 -0400
commit057e6a8c660e95c3f4e7162e00e2fee1fc90c50d (patch)
tree5c0a81327964affd44137754b64fbaf93ceba6fa /arch/x86/kernel/tsc.c
parent8c49d9a74bac5ea3f18480307057241b808fcc0c (diff)
x86-64: Remove unnecessary barrier in vread_tsc
RDTSC is completely unordered on modern Intel and AMD CPUs. The Intel manual says that lfence;rdtsc causes all previous instructions to complete before the tsc is read, and the AMD manual says to use mfence;rdtsc to do the same thing. From a decent amount of testing [1] this is enough to make rdtsc be ordered with respect to subsequent loads across a wide variety of CPUs. On Sandy Bridge (i7-2600), this improves a loop of clock_gettime(CLOCK_MONOTONIC) by more than 5 ns/iter. [1] https://lkml.org/lkml/2011/4/18/350 Signed-off-by: Andy Lutomirski <luto@mit.edu> Cc: Andi Kleen <andi@firstfloor.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: "David S. Miller" <davem@davemloft.net> Cc: Eric Dumazet <eric.dumazet@gmail.com> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Borislav Petkov <bp@amd64.org> Link: http://lkml.kernel.org/r/%3C1c158b9d74338aa5361f96dd473d0e6a58235302.1306156808.git.luto%40mit.edu%3E Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Diffstat (limited to 'arch/x86/kernel/tsc.c')
-rw-r--r--arch/x86/kernel/tsc.c9
1 files changed, 5 insertions, 4 deletions
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index db697b81b8b1..1e6244202612 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -769,13 +769,14 @@ static cycle_t __vsyscall_fn vread_tsc(void)
769 cycle_t ret; 769 cycle_t ret;
770 770
771 /* 771 /*
772 * Surround the RDTSC by barriers, to make sure it's not 772 * Empirically, a fence (of type that depends on the CPU)
773 * speculated to outside the seqlock critical section and 773 * before rdtsc is enough to ensure that rdtsc is ordered
774 * does not cause time warps: 774 * with respect to loads. The various CPU manuals are unclear
775 * as to whether rdtsc can be reordered with later loads,
776 * but no one has ever seen it happen.
775 */ 777 */
776 rdtsc_barrier(); 778 rdtsc_barrier();
777 ret = (cycle_t)vget_cycles(); 779 ret = (cycle_t)vget_cycles();
778 rdtsc_barrier();
779 780
780 return ret >= VVAR(vsyscall_gtod_data).clock.cycle_last ? 781 return ret >= VVAR(vsyscall_gtod_data).clock.cycle_last ?
781 ret : VVAR(vsyscall_gtod_data).clock.cycle_last; 782 ret : VVAR(vsyscall_gtod_data).clock.cycle_last;