aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndy Lutomirski <luto@MIT.EDU>2011-05-23 09:31:28 -0400
committerThomas Gleixner <tglx@linutronix.de>2011-05-24 08:51:29 -0400
commit44259b1abfaa8bb819d25d41d71e8e33e25dd36a (patch)
tree6255324c0bf2fef17ec3f7d92cdc88d9bbed44a3
parent0f51f2852ccf0fe38a02d340d0ba625e8e32a863 (diff)
x86-64: Move vread_tsc into a new file with sensible options
vread_tsc is short and hot, and it's userspace code so the usual reasons to enable -pg and turn off sibling calls don't apply. (OK, turning off sibling calls has no effect. But it might someday...) As an added benefit, tsc.c is profilable now. Signed-off-by: Andy Lutomirski <luto@mit.edu> Cc: Andi Kleen <andi@firstfloor.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: "David S. Miller" <davem@davemloft.net> Cc: Eric Dumazet <eric.dumazet@gmail.com> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Borislav Petkov <bp@amd64.org> Link: http://lkml.kernel.org/r/%3C99c6d7f5efa3ccb65b4ac6eb443e1ab7bad47d7b.1306156808.git.luto%40mit.edu%3E Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-rw-r--r--arch/x86/include/asm/tsc.h4
-rw-r--r--arch/x86/kernel/Makefile8
-rw-r--r--arch/x86/kernel/tsc.c34
-rw-r--r--arch/x86/kernel/vread_tsc_64.c36
4 files changed, 45 insertions, 37 deletions
diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h
index 83e2efd181e2..9db5583b6d38 100644
--- a/arch/x86/include/asm/tsc.h
+++ b/arch/x86/include/asm/tsc.h
@@ -51,6 +51,10 @@ extern int unsynchronized_tsc(void);
51extern int check_tsc_unstable(void); 51extern int check_tsc_unstable(void);
52extern unsigned long native_calibrate_tsc(void); 52extern unsigned long native_calibrate_tsc(void);
53 53
54#ifdef CONFIG_X86_64
55extern cycles_t vread_tsc(void);
56#endif
57
54/* 58/*
55 * Boot-time check whether the TSCs are synchronized across 59 * Boot-time check whether the TSCs are synchronized across
56 * all CPUs/cores: 60 * all CPUs/cores:
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 250806472a7e..f5abe3a245b8 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -8,7 +8,6 @@ CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE)
8 8
9ifdef CONFIG_FUNCTION_TRACER 9ifdef CONFIG_FUNCTION_TRACER
10# Do not profile debug and lowlevel utilities 10# Do not profile debug and lowlevel utilities
11CFLAGS_REMOVE_tsc.o = -pg
12CFLAGS_REMOVE_rtc.o = -pg 11CFLAGS_REMOVE_rtc.o = -pg
13CFLAGS_REMOVE_paravirt-spinlocks.o = -pg 12CFLAGS_REMOVE_paravirt-spinlocks.o = -pg
14CFLAGS_REMOVE_pvclock.o = -pg 13CFLAGS_REMOVE_pvclock.o = -pg
@@ -24,13 +23,16 @@ endif
24nostackp := $(call cc-option, -fno-stack-protector) 23nostackp := $(call cc-option, -fno-stack-protector)
25CFLAGS_vsyscall_64.o := $(PROFILING) -g0 $(nostackp) 24CFLAGS_vsyscall_64.o := $(PROFILING) -g0 $(nostackp)
26CFLAGS_hpet.o := $(nostackp) 25CFLAGS_hpet.o := $(nostackp)
27CFLAGS_tsc.o := $(nostackp) 26CFLAGS_vread_tsc_64.o := $(nostackp)
28CFLAGS_paravirt.o := $(nostackp) 27CFLAGS_paravirt.o := $(nostackp)
29GCOV_PROFILE_vsyscall_64.o := n 28GCOV_PROFILE_vsyscall_64.o := n
30GCOV_PROFILE_hpet.o := n 29GCOV_PROFILE_hpet.o := n
31GCOV_PROFILE_tsc.o := n 30GCOV_PROFILE_tsc.o := n
32GCOV_PROFILE_paravirt.o := n 31GCOV_PROFILE_paravirt.o := n
33 32
33# vread_tsc_64 is hot and should be fully optimized:
34CFLAGS_REMOVE_vread_tsc_64.o = -pg -fno-optimize-sibling-calls
35
34obj-y := process_$(BITS).o signal.o entry_$(BITS).o 36obj-y := process_$(BITS).o signal.o entry_$(BITS).o
35obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o 37obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
36obj-y += time.o ioport.o ldt.o dumpstack.o 38obj-y += time.o ioport.o ldt.o dumpstack.o
@@ -39,7 +41,7 @@ obj-$(CONFIG_IRQ_WORK) += irq_work.o
39obj-y += probe_roms.o 41obj-y += probe_roms.o
40obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o 42obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o
41obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o 43obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o
42obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o 44obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o vread_tsc_64.o
43obj-y += bootflag.o e820.o 45obj-y += bootflag.o e820.o
44obj-y += pci-dma.o quirks.o topology.o kdebugfs.o 46obj-y += pci-dma.o quirks.o topology.o kdebugfs.o
45obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o 47obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 24249a5360b6..6cc6922262af 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -763,40 +763,6 @@ static cycle_t read_tsc(struct clocksource *cs)
763 ret : clocksource_tsc.cycle_last; 763 ret : clocksource_tsc.cycle_last;
764} 764}
765 765
766#ifdef CONFIG_X86_64
767static cycle_t __vsyscall_fn vread_tsc(void)
768{
769 cycle_t ret;
770 u64 last;
771
772 /*
773 * Empirically, a fence (of type that depends on the CPU)
774 * before rdtsc is enough to ensure that rdtsc is ordered
775 * with respect to loads. The various CPU manuals are unclear
776 * as to whether rdtsc can be reordered with later loads,
777 * but no one has ever seen it happen.
778 */
779 rdtsc_barrier();
780 ret = (cycle_t)vget_cycles();
781
782 last = VVAR(vsyscall_gtod_data).clock.cycle_last;
783
784 if (likely(ret >= last))
785 return ret;
786
787 /*
788 * GCC likes to generate cmov here, but this branch is extremely
789 * predictable (it's just a funciton of time and the likely is
790 * very likely) and there's a data dependence, so force GCC
791 * to generate a branch instead. I don't barrier() because
792 * we don't actually need a barrier, and if this function
793 * ever gets inlined it will generate worse code.
794 */
795 asm volatile ("");
796 return last;
797}
798#endif
799
800static void resume_tsc(struct clocksource *cs) 766static void resume_tsc(struct clocksource *cs)
801{ 767{
802 clocksource_tsc.cycle_last = 0; 768 clocksource_tsc.cycle_last = 0;
diff --git a/arch/x86/kernel/vread_tsc_64.c b/arch/x86/kernel/vread_tsc_64.c
new file mode 100644
index 000000000000..a81aa9e9894c
--- /dev/null
+++ b/arch/x86/kernel/vread_tsc_64.c
@@ -0,0 +1,36 @@
1/* This code runs in userspace. */
2
3#define DISABLE_BRANCH_PROFILING
4#include <asm/vgtod.h>
5
6notrace cycle_t __vsyscall_fn vread_tsc(void)
7{
8 cycle_t ret;
9 u64 last;
10
11 /*
12 * Empirically, a fence (of type that depends on the CPU)
13 * before rdtsc is enough to ensure that rdtsc is ordered
14 * with respect to loads. The various CPU manuals are unclear
15 * as to whether rdtsc can be reordered with later loads,
16 * but no one has ever seen it happen.
17 */
18 rdtsc_barrier();
19 ret = (cycle_t)vget_cycles();
20
21 last = VVAR(vsyscall_gtod_data).clock.cycle_last;
22
23 if (likely(ret >= last))
24 return ret;
25
26 /*
27 * GCC likes to generate cmov here, but this branch is extremely
28 * predictable (it's just a funciton of time and the likely is
29 * very likely) and there's a data dependence, so force GCC
30 * to generate a branch instead. I don't barrier() because
31 * we don't actually need a barrier, and if this function
32 * ever gets inlined it will generate worse code.
33 */
34 asm volatile ("");
35 return last;
36}