diff options
author | Andy Lutomirski <luto@mit.edu> | 2011-07-14 06:47:22 -0400 |
---|---|---|
committer | H. Peter Anvin <hpa@linux.intel.com> | 2011-07-14 20:57:05 -0400 |
commit | 98d0ac38ca7b1b7a552c9a2359174ff84decb600 (patch) | |
tree | 0c244e828f86c779c348a4888ed9e303c3e59811 | |
parent | 433bd805e5fd2c731b3a9025b034f066272d336e (diff) |
x86-64: Move vread_tsc and vread_hpet into the vDSO
The vsyscall page now consists entirely of trap instructions.
Cc: John Stultz <johnstul@us.ibm.com>
Signed-off-by: Andy Lutomirski <luto@mit.edu>
Link: http://lkml.kernel.org/r/637648f303f2ef93af93bae25186e9a1bea093f5.1310639973.git.luto@mit.edu
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
-rw-r--r-- | arch/x86/include/asm/clocksource.h | 6 | ||||
-rw-r--r-- | arch/x86/include/asm/tsc.h | 4 | ||||
-rw-r--r-- | arch/x86/include/asm/vgtod.h | 2 | ||||
-rw-r--r-- | arch/x86/include/asm/vsyscall.h | 4 | ||||
-rw-r--r-- | arch/x86/kernel/Makefile | 7 | ||||
-rw-r--r-- | arch/x86/kernel/alternative.c | 8 | ||||
-rw-r--r-- | arch/x86/kernel/hpet.c | 9 | ||||
-rw-r--r-- | arch/x86/kernel/tsc.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/vmlinux.lds.S | 3 | ||||
-rw-r--r-- | arch/x86/kernel/vread_tsc_64.c | 36 | ||||
-rw-r--r-- | arch/x86/kernel/vsyscall_64.c | 2 | ||||
-rw-r--r-- | arch/x86/vdso/vclock_gettime.c | 53 |
12 files changed, 57 insertions, 79 deletions
diff --git a/arch/x86/include/asm/clocksource.h b/arch/x86/include/asm/clocksource.h index a5df33f614c9..3882c65dc19b 100644 --- a/arch/x86/include/asm/clocksource.h +++ b/arch/x86/include/asm/clocksource.h | |||
@@ -7,8 +7,12 @@ | |||
7 | 7 | ||
8 | #define __ARCH_HAS_CLOCKSOURCE_DATA | 8 | #define __ARCH_HAS_CLOCKSOURCE_DATA |
9 | 9 | ||
10 | #define VCLOCK_NONE 0 /* No vDSO clock available. */ | ||
11 | #define VCLOCK_TSC 1 /* vDSO should use vread_tsc. */ | ||
12 | #define VCLOCK_HPET 2 /* vDSO should use vread_hpet. */ | ||
13 | |||
10 | struct arch_clocksource_data { | 14 | struct arch_clocksource_data { |
11 | cycle_t (*vread)(void); | 15 | int vclock_mode; |
12 | }; | 16 | }; |
13 | 17 | ||
14 | #endif /* CONFIG_X86_64 */ | 18 | #endif /* CONFIG_X86_64 */ |
diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h index 9db5583b6d38..83e2efd181e2 100644 --- a/arch/x86/include/asm/tsc.h +++ b/arch/x86/include/asm/tsc.h | |||
@@ -51,10 +51,6 @@ extern int unsynchronized_tsc(void); | |||
51 | extern int check_tsc_unstable(void); | 51 | extern int check_tsc_unstable(void); |
52 | extern unsigned long native_calibrate_tsc(void); | 52 | extern unsigned long native_calibrate_tsc(void); |
53 | 53 | ||
54 | #ifdef CONFIG_X86_64 | ||
55 | extern cycles_t vread_tsc(void); | ||
56 | #endif | ||
57 | |||
58 | /* | 54 | /* |
59 | * Boot-time check whether the TSCs are synchronized across | 55 | * Boot-time check whether the TSCs are synchronized across |
60 | * all CPUs/cores: | 56 | * all CPUs/cores: |
diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h index aa5add855a91..815285bcaceb 100644 --- a/arch/x86/include/asm/vgtod.h +++ b/arch/x86/include/asm/vgtod.h | |||
@@ -13,7 +13,7 @@ struct vsyscall_gtod_data { | |||
13 | 13 | ||
14 | struct timezone sys_tz; | 14 | struct timezone sys_tz; |
15 | struct { /* extract of a clocksource struct */ | 15 | struct { /* extract of a clocksource struct */ |
16 | cycle_t (*vread)(void); | 16 | int vclock_mode; |
17 | cycle_t cycle_last; | 17 | cycle_t cycle_last; |
18 | cycle_t mask; | 18 | cycle_t mask; |
19 | u32 mult; | 19 | u32 mult; |
diff --git a/arch/x86/include/asm/vsyscall.h b/arch/x86/include/asm/vsyscall.h index d55597351f6a..60107072c28b 100644 --- a/arch/x86/include/asm/vsyscall.h +++ b/arch/x86/include/asm/vsyscall.h | |||
@@ -16,10 +16,6 @@ enum vsyscall_num { | |||
16 | #ifdef __KERNEL__ | 16 | #ifdef __KERNEL__ |
17 | #include <linux/seqlock.h> | 17 | #include <linux/seqlock.h> |
18 | 18 | ||
19 | /* Definitions for CONFIG_GENERIC_TIME definitions */ | ||
20 | #define __vsyscall_fn \ | ||
21 | __attribute__ ((unused, __section__(".vsyscall_fn"))) notrace | ||
22 | |||
23 | #define VGETCPU_RDTSCP 1 | 19 | #define VGETCPU_RDTSCP 1 |
24 | #define VGETCPU_LSL 2 | 20 | #define VGETCPU_LSL 2 |
25 | 21 | ||
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index cc0469a65120..2deef3d2435a 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
@@ -24,17 +24,12 @@ endif | |||
24 | nostackp := $(call cc-option, -fno-stack-protector) | 24 | nostackp := $(call cc-option, -fno-stack-protector) |
25 | CFLAGS_vsyscall_64.o := $(PROFILING) -g0 $(nostackp) | 25 | CFLAGS_vsyscall_64.o := $(PROFILING) -g0 $(nostackp) |
26 | CFLAGS_hpet.o := $(nostackp) | 26 | CFLAGS_hpet.o := $(nostackp) |
27 | CFLAGS_vread_tsc_64.o := $(nostackp) | ||
28 | CFLAGS_paravirt.o := $(nostackp) | 27 | CFLAGS_paravirt.o := $(nostackp) |
29 | GCOV_PROFILE_vsyscall_64.o := n | 28 | GCOV_PROFILE_vsyscall_64.o := n |
30 | GCOV_PROFILE_hpet.o := n | 29 | GCOV_PROFILE_hpet.o := n |
31 | GCOV_PROFILE_tsc.o := n | 30 | GCOV_PROFILE_tsc.o := n |
32 | GCOV_PROFILE_vread_tsc_64.o := n | ||
33 | GCOV_PROFILE_paravirt.o := n | 31 | GCOV_PROFILE_paravirt.o := n |
34 | 32 | ||
35 | # vread_tsc_64 is hot and should be fully optimized: | ||
36 | CFLAGS_REMOVE_vread_tsc_64.o = -pg -fno-optimize-sibling-calls | ||
37 | |||
38 | obj-y := process_$(BITS).o signal.o entry_$(BITS).o | 33 | obj-y := process_$(BITS).o signal.o entry_$(BITS).o |
39 | obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o | 34 | obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o |
40 | obj-y += time.o ioport.o ldt.o dumpstack.o | 35 | obj-y += time.o ioport.o ldt.o dumpstack.o |
@@ -43,7 +38,7 @@ obj-$(CONFIG_IRQ_WORK) += irq_work.o | |||
43 | obj-y += probe_roms.o | 38 | obj-y += probe_roms.o |
44 | obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o | 39 | obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o |
45 | obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o | 40 | obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o |
46 | obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o vread_tsc_64.o | 41 | obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o |
47 | obj-$(CONFIG_X86_64) += vsyscall_emu_64.o | 42 | obj-$(CONFIG_X86_64) += vsyscall_emu_64.o |
48 | obj-y += bootflag.o e820.o | 43 | obj-y += bootflag.o e820.o |
49 | obj-y += pci-dma.o quirks.o topology.o kdebugfs.o | 44 | obj-y += pci-dma.o quirks.o topology.o kdebugfs.o |
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index ddb207bb5f91..c63822816249 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c | |||
@@ -14,7 +14,6 @@ | |||
14 | #include <asm/pgtable.h> | 14 | #include <asm/pgtable.h> |
15 | #include <asm/mce.h> | 15 | #include <asm/mce.h> |
16 | #include <asm/nmi.h> | 16 | #include <asm/nmi.h> |
17 | #include <asm/vsyscall.h> | ||
18 | #include <asm/cacheflush.h> | 17 | #include <asm/cacheflush.h> |
19 | #include <asm/tlbflush.h> | 18 | #include <asm/tlbflush.h> |
20 | #include <asm/io.h> | 19 | #include <asm/io.h> |
@@ -250,7 +249,6 @@ static void __init_or_module add_nops(void *insns, unsigned int len) | |||
250 | 249 | ||
251 | extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; | 250 | extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; |
252 | extern s32 __smp_locks[], __smp_locks_end[]; | 251 | extern s32 __smp_locks[], __smp_locks_end[]; |
253 | extern char __vsyscall_0; | ||
254 | void *text_poke_early(void *addr, const void *opcode, size_t len); | 252 | void *text_poke_early(void *addr, const void *opcode, size_t len); |
255 | 253 | ||
256 | /* Replace instructions with better alternatives for this CPU type. | 254 | /* Replace instructions with better alternatives for this CPU type. |
@@ -294,12 +292,6 @@ void __init_or_module apply_alternatives(struct alt_instr *start, | |||
294 | add_nops(insnbuf + a->replacementlen, | 292 | add_nops(insnbuf + a->replacementlen, |
295 | a->instrlen - a->replacementlen); | 293 | a->instrlen - a->replacementlen); |
296 | 294 | ||
297 | #ifdef CONFIG_X86_64 | ||
298 | /* vsyscall code is not mapped yet. resolve it manually. */ | ||
299 | if (instr >= (u8 *)VSYSCALL_START && instr < (u8*)VSYSCALL_END) { | ||
300 | instr = __va(instr - (u8*)VSYSCALL_START + (u8*)__pa_symbol(&__vsyscall_0)); | ||
301 | } | ||
302 | #endif | ||
303 | text_poke_early(instr, insnbuf, a->instrlen); | 295 | text_poke_early(instr, insnbuf, a->instrlen); |
304 | } | 296 | } |
305 | } | 297 | } |
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 0e07257bb389..d10cc009845f 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c | |||
@@ -738,13 +738,6 @@ static cycle_t read_hpet(struct clocksource *cs) | |||
738 | return (cycle_t)hpet_readl(HPET_COUNTER); | 738 | return (cycle_t)hpet_readl(HPET_COUNTER); |
739 | } | 739 | } |
740 | 740 | ||
741 | #ifdef CONFIG_X86_64 | ||
742 | static cycle_t __vsyscall_fn vread_hpet(void) | ||
743 | { | ||
744 | return readl((const void __iomem *)fix_to_virt(VSYSCALL_HPET) + 0xf0); | ||
745 | } | ||
746 | #endif | ||
747 | |||
748 | static struct clocksource clocksource_hpet = { | 741 | static struct clocksource clocksource_hpet = { |
749 | .name = "hpet", | 742 | .name = "hpet", |
750 | .rating = 250, | 743 | .rating = 250, |
@@ -753,7 +746,7 @@ static struct clocksource clocksource_hpet = { | |||
753 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, | 746 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, |
754 | .resume = hpet_resume_counter, | 747 | .resume = hpet_resume_counter, |
755 | #ifdef CONFIG_X86_64 | 748 | #ifdef CONFIG_X86_64 |
756 | .archdata = { .vread = vread_hpet }, | 749 | .archdata = { .vclock_mode = VCLOCK_HPET }, |
757 | #endif | 750 | #endif |
758 | }; | 751 | }; |
759 | 752 | ||
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index e7a74b889ab3..56c633a5db72 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c | |||
@@ -777,7 +777,7 @@ static struct clocksource clocksource_tsc = { | |||
777 | .flags = CLOCK_SOURCE_IS_CONTINUOUS | | 777 | .flags = CLOCK_SOURCE_IS_CONTINUOUS | |
778 | CLOCK_SOURCE_MUST_VERIFY, | 778 | CLOCK_SOURCE_MUST_VERIFY, |
779 | #ifdef CONFIG_X86_64 | 779 | #ifdef CONFIG_X86_64 |
780 | .archdata = { .vread = vread_tsc }, | 780 | .archdata = { .vclock_mode = VCLOCK_TSC }, |
781 | #endif | 781 | #endif |
782 | }; | 782 | }; |
783 | 783 | ||
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 80174719910c..4aa9c54a9b76 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S | |||
@@ -169,9 +169,6 @@ SECTIONS | |||
169 | .vsyscall : AT(VLOAD(.vsyscall)) { | 169 | .vsyscall : AT(VLOAD(.vsyscall)) { |
170 | *(.vsyscall_0) | 170 | *(.vsyscall_0) |
171 | 171 | ||
172 | . = ALIGN(L1_CACHE_BYTES); | ||
173 | *(.vsyscall_fn) | ||
174 | |||
175 | . = 1024; | 172 | . = 1024; |
176 | *(.vsyscall_1) | 173 | *(.vsyscall_1) |
177 | 174 | ||
diff --git a/arch/x86/kernel/vread_tsc_64.c b/arch/x86/kernel/vread_tsc_64.c deleted file mode 100644 index a81aa9e9894c..000000000000 --- a/arch/x86/kernel/vread_tsc_64.c +++ /dev/null | |||
@@ -1,36 +0,0 @@ | |||
1 | /* This code runs in userspace. */ | ||
2 | |||
3 | #define DISABLE_BRANCH_PROFILING | ||
4 | #include <asm/vgtod.h> | ||
5 | |||
6 | notrace cycle_t __vsyscall_fn vread_tsc(void) | ||
7 | { | ||
8 | cycle_t ret; | ||
9 | u64 last; | ||
10 | |||
11 | /* | ||
12 | * Empirically, a fence (of type that depends on the CPU) | ||
13 | * before rdtsc is enough to ensure that rdtsc is ordered | ||
14 | * with respect to loads. The various CPU manuals are unclear | ||
15 | * as to whether rdtsc can be reordered with later loads, | ||
16 | * but no one has ever seen it happen. | ||
17 | */ | ||
18 | rdtsc_barrier(); | ||
19 | ret = (cycle_t)vget_cycles(); | ||
20 | |||
21 | last = VVAR(vsyscall_gtod_data).clock.cycle_last; | ||
22 | |||
23 | if (likely(ret >= last)) | ||
24 | return ret; | ||
25 | |||
26 | /* | ||
27 | * GCC likes to generate cmov here, but this branch is extremely | ||
28 | * predictable (it's just a funciton of time and the likely is | ||
29 | * very likely) and there's a data dependence, so force GCC | ||
30 | * to generate a branch instead. I don't barrier() because | ||
31 | * we don't actually need a barrier, and if this function | ||
32 | * ever gets inlined it will generate worse code. | ||
33 | */ | ||
34 | asm volatile (""); | ||
35 | return last; | ||
36 | } | ||
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index 12d488fd95d9..dda7dff9cef7 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c | |||
@@ -74,7 +74,7 @@ void update_vsyscall(struct timespec *wall_time, struct timespec *wtm, | |||
74 | write_seqlock_irqsave(&vsyscall_gtod_data.lock, flags); | 74 | write_seqlock_irqsave(&vsyscall_gtod_data.lock, flags); |
75 | 75 | ||
76 | /* copy vsyscall data */ | 76 | /* copy vsyscall data */ |
77 | vsyscall_gtod_data.clock.vread = clock->archdata.vread; | 77 | vsyscall_gtod_data.clock.vclock_mode = clock->archdata.vclock_mode; |
78 | vsyscall_gtod_data.clock.cycle_last = clock->cycle_last; | 78 | vsyscall_gtod_data.clock.cycle_last = clock->cycle_last; |
79 | vsyscall_gtod_data.clock.mask = clock->mask; | 79 | vsyscall_gtod_data.clock.mask = clock->mask; |
80 | vsyscall_gtod_data.clock.mult = mult; | 80 | vsyscall_gtod_data.clock.mult = mult; |
diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c index cf54813ac527..8792d6e0a2c3 100644 --- a/arch/x86/vdso/vclock_gettime.c +++ b/arch/x86/vdso/vclock_gettime.c | |||
@@ -17,6 +17,7 @@ | |||
17 | #include <linux/time.h> | 17 | #include <linux/time.h> |
18 | #include <linux/string.h> | 18 | #include <linux/string.h> |
19 | #include <asm/vsyscall.h> | 19 | #include <asm/vsyscall.h> |
20 | #include <asm/fixmap.h> | ||
20 | #include <asm/vgtod.h> | 21 | #include <asm/vgtod.h> |
21 | #include <asm/timex.h> | 22 | #include <asm/timex.h> |
22 | #include <asm/hpet.h> | 23 | #include <asm/hpet.h> |
@@ -25,6 +26,43 @@ | |||
25 | 26 | ||
26 | #define gtod (&VVAR(vsyscall_gtod_data)) | 27 | #define gtod (&VVAR(vsyscall_gtod_data)) |
27 | 28 | ||
29 | notrace static cycle_t vread_tsc(void) | ||
30 | { | ||
31 | cycle_t ret; | ||
32 | u64 last; | ||
33 | |||
34 | /* | ||
35 | * Empirically, a fence (of type that depends on the CPU) | ||
36 | * before rdtsc is enough to ensure that rdtsc is ordered | ||
37 | * with respect to loads. The various CPU manuals are unclear | ||
38 | * as to whether rdtsc can be reordered with later loads, | ||
39 | * but no one has ever seen it happen. | ||
40 | */ | ||
41 | rdtsc_barrier(); | ||
42 | ret = (cycle_t)vget_cycles(); | ||
43 | |||
44 | last = VVAR(vsyscall_gtod_data).clock.cycle_last; | ||
45 | |||
46 | if (likely(ret >= last)) | ||
47 | return ret; | ||
48 | |||
49 | /* | ||
50 | * GCC likes to generate cmov here, but this branch is extremely | ||
51 | * predictable (it's just a funciton of time and the likely is | ||
52 | * very likely) and there's a data dependence, so force GCC | ||
53 | * to generate a branch instead. I don't barrier() because | ||
54 | * we don't actually need a barrier, and if this function | ||
55 | * ever gets inlined it will generate worse code. | ||
56 | */ | ||
57 | asm volatile (""); | ||
58 | return last; | ||
59 | } | ||
60 | |||
61 | static notrace cycle_t vread_hpet(void) | ||
62 | { | ||
63 | return readl((const void __iomem *)fix_to_virt(VSYSCALL_HPET) + 0xf0); | ||
64 | } | ||
65 | |||
28 | notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) | 66 | notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) |
29 | { | 67 | { |
30 | long ret; | 68 | long ret; |
@@ -36,9 +74,12 @@ notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) | |||
36 | notrace static inline long vgetns(void) | 74 | notrace static inline long vgetns(void) |
37 | { | 75 | { |
38 | long v; | 76 | long v; |
39 | cycles_t (*vread)(void); | 77 | cycles_t cycles; |
40 | vread = gtod->clock.vread; | 78 | if (gtod->clock.vclock_mode == VCLOCK_TSC) |
41 | v = (vread() - gtod->clock.cycle_last) & gtod->clock.mask; | 79 | cycles = vread_tsc(); |
80 | else | ||
81 | cycles = vread_hpet(); | ||
82 | v = (cycles - gtod->clock.cycle_last) & gtod->clock.mask; | ||
42 | return (v * gtod->clock.mult) >> gtod->clock.shift; | 83 | return (v * gtod->clock.mult) >> gtod->clock.shift; |
43 | } | 84 | } |
44 | 85 | ||
@@ -118,11 +159,11 @@ notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts) | |||
118 | { | 159 | { |
119 | switch (clock) { | 160 | switch (clock) { |
120 | case CLOCK_REALTIME: | 161 | case CLOCK_REALTIME: |
121 | if (likely(gtod->clock.vread)) | 162 | if (likely(gtod->clock.vclock_mode != VCLOCK_NONE)) |
122 | return do_realtime(ts); | 163 | return do_realtime(ts); |
123 | break; | 164 | break; |
124 | case CLOCK_MONOTONIC: | 165 | case CLOCK_MONOTONIC: |
125 | if (likely(gtod->clock.vread)) | 166 | if (likely(gtod->clock.vclock_mode != VCLOCK_NONE)) |
126 | return do_monotonic(ts); | 167 | return do_monotonic(ts); |
127 | break; | 168 | break; |
128 | case CLOCK_REALTIME_COARSE: | 169 | case CLOCK_REALTIME_COARSE: |
@@ -139,7 +180,7 @@ int clock_gettime(clockid_t, struct timespec *) | |||
139 | notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz) | 180 | notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz) |
140 | { | 181 | { |
141 | long ret; | 182 | long ret; |
142 | if (likely(gtod->clock.vread)) { | 183 | if (likely(gtod->clock.vclock_mode != VCLOCK_NONE)) { |
143 | if (likely(tv != NULL)) { | 184 | if (likely(tv != NULL)) { |
144 | BUILD_BUG_ON(offsetof(struct timeval, tv_usec) != | 185 | BUILD_BUG_ON(offsetof(struct timeval, tv_usec) != |
145 | offsetof(struct timespec, tv_nsec) || | 186 | offsetof(struct timespec, tv_nsec) || |