aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
authorAndy Lutomirski <luto@MIT.EDU>2011-05-23 09:31:26 -0400
committerThomas Gleixner <tglx@linutronix.de>2011-05-24 08:51:28 -0400
commit3729db5ca2b2000c660e5a5d0eb68b1053212cab (patch)
treea8b038145eadd067f92798adb4292532eccedfba /arch/x86/kernel
parent057e6a8c660e95c3f4e7162e00e2fee1fc90c50d (diff)
x86-64: Don't generate cmov in vread_tsc
vread_tsc checks whether rdtsc returns something less than cycle_last, which is an extremely predictable branch. GCC likes to generate a cmov anyway, which is several cycles slower than a predicted branch. This saves a couple of nanoseconds. Signed-off-by: Andy Lutomirski <luto@mit.edu> Cc: Andi Kleen <andi@firstfloor.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: "David S. Miller" <davem@davemloft.net> Cc: Eric Dumazet <eric.dumazet@gmail.com> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Borislav Petkov <bp@amd64.org> Link: http://lkml.kernel.org/r/%3C561280649519de41352fcb620684dfb22bad6bac.1306156808.git.luto%40mit.edu%3E Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/tsc.c18
1 files changed, 16 insertions, 2 deletions
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 1e6244202612..24249a5360b6 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -767,6 +767,7 @@ static cycle_t read_tsc(struct clocksource *cs)
767static cycle_t __vsyscall_fn vread_tsc(void) 767static cycle_t __vsyscall_fn vread_tsc(void)
768{ 768{
769 cycle_t ret; 769 cycle_t ret;
770 u64 last;
770 771
771 /* 772 /*
772 * Empirically, a fence (of type that depends on the CPU) 773 * Empirically, a fence (of type that depends on the CPU)
@@ -778,8 +779,21 @@ static cycle_t __vsyscall_fn vread_tsc(void)
778 rdtsc_barrier(); 779 rdtsc_barrier();
779 ret = (cycle_t)vget_cycles(); 780 ret = (cycle_t)vget_cycles();
780 781
781 return ret >= VVAR(vsyscall_gtod_data).clock.cycle_last ? 782 last = VVAR(vsyscall_gtod_data).clock.cycle_last;
782 ret : VVAR(vsyscall_gtod_data).clock.cycle_last; 783
784 if (likely(ret >= last))
785 return ret;
786
787 /*
788 * GCC likes to generate cmov here, but this branch is extremely
789 * predictable (it's just a funciton of time and the likely is
790 * very likely) and there's a data dependence, so force GCC
791 * to generate a branch instead. I don't barrier() because
792 * we don't actually need a barrier, and if this function
793 * ever gets inlined it will generate worse code.
794 */
795 asm volatile ("");
796 return last;
783} 797}
784#endif 798#endif
785 799