diff options
author | Andy Lutomirski <luto@MIT.EDU> | 2011-05-23 09:31:26 -0400 |
---|---|---|
committer | Thomas Gleixner <tglx@linutronix.de> | 2011-05-24 08:51:28 -0400 |
commit | 3729db5ca2b2000c660e5a5d0eb68b1053212cab (patch) | |
tree | a8b038145eadd067f92798adb4292532eccedfba /arch/x86/kernel | |
parent | 057e6a8c660e95c3f4e7162e00e2fee1fc90c50d (diff) |
x86-64: Don't generate cmov in vread_tsc
vread_tsc checks whether rdtsc returns something less than
cycle_last, which is an extremely predictable branch. GCC likes
to generate a cmov anyway, which is several cycles slower than
a predicted branch. This saves a couple of nanoseconds.
Signed-off-by: Andy Lutomirski <luto@mit.edu>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Borislav Petkov <bp@amd64.org>
Link: http://lkml.kernel.org/r/%3C561280649519de41352fcb620684dfb22bad6bac.1306156808.git.luto%40mit.edu%3E
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r-- | arch/x86/kernel/tsc.c | 18 |
1 files changed, 16 insertions, 2 deletions
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 1e6244202612..24249a5360b6 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c | |||
@@ -767,6 +767,7 @@ static cycle_t read_tsc(struct clocksource *cs) | |||
767 | static cycle_t __vsyscall_fn vread_tsc(void) | 767 | static cycle_t __vsyscall_fn vread_tsc(void) |
768 | { | 768 | { |
769 | cycle_t ret; | 769 | cycle_t ret; |
770 | u64 last; | ||
770 | 771 | ||
771 | /* | 772 | /* |
772 | * Empirically, a fence (of type that depends on the CPU) | 773 | * Empirically, a fence (of type that depends on the CPU) |
@@ -778,8 +779,21 @@ static cycle_t __vsyscall_fn vread_tsc(void) | |||
778 | rdtsc_barrier(); | 779 | rdtsc_barrier(); |
779 | ret = (cycle_t)vget_cycles(); | 780 | ret = (cycle_t)vget_cycles(); |
780 | 781 | ||
781 | return ret >= VVAR(vsyscall_gtod_data).clock.cycle_last ? | 782 | last = VVAR(vsyscall_gtod_data).clock.cycle_last; |
782 | ret : VVAR(vsyscall_gtod_data).clock.cycle_last; | 783 | |
784 | if (likely(ret >= last)) | ||
785 | return ret; | ||
786 | |||
787 | /* | ||
788 | * GCC likes to generate cmov here, but this branch is extremely | ||
789 | * predictable (it's just a funciton of time and the likely is | ||
790 | * very likely) and there's a data dependence, so force GCC | ||
791 | * to generate a branch instead. I don't barrier() because | ||
792 | * we don't actually need a barrier, and if this function | ||
793 | * ever gets inlined it will generate worse code. | ||
794 | */ | ||
795 | asm volatile (""); | ||
796 | return last; | ||
783 | } | 797 | } |
784 | #endif | 798 | #endif |
785 | 799 | ||