aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndy Lutomirski <luto@mit.edu>2011-07-14 06:47:22 -0400
committerH. Peter Anvin <hpa@linux.intel.com>2011-07-14 20:57:05 -0400
commit98d0ac38ca7b1b7a552c9a2359174ff84decb600 (patch)
tree0c244e828f86c779c348a4888ed9e303c3e59811
parent433bd805e5fd2c731b3a9025b034f066272d336e (diff)
x86-64: Move vread_tsc and vread_hpet into the vDSO
The vsyscall page now consists entirely of trap instructions. Cc: John Stultz <johnstul@us.ibm.com> Signed-off-by: Andy Lutomirski <luto@mit.edu> Link: http://lkml.kernel.org/r/637648f303f2ef93af93bae25186e9a1bea093f5.1310639973.git.luto@mit.edu Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
-rw-r--r--arch/x86/include/asm/clocksource.h6
-rw-r--r--arch/x86/include/asm/tsc.h4
-rw-r--r--arch/x86/include/asm/vgtod.h2
-rw-r--r--arch/x86/include/asm/vsyscall.h4
-rw-r--r--arch/x86/kernel/Makefile7
-rw-r--r--arch/x86/kernel/alternative.c8
-rw-r--r--arch/x86/kernel/hpet.c9
-rw-r--r--arch/x86/kernel/tsc.c2
-rw-r--r--arch/x86/kernel/vmlinux.lds.S3
-rw-r--r--arch/x86/kernel/vread_tsc_64.c36
-rw-r--r--arch/x86/kernel/vsyscall_64.c2
-rw-r--r--arch/x86/vdso/vclock_gettime.c53
12 files changed, 57 insertions, 79 deletions
diff --git a/arch/x86/include/asm/clocksource.h b/arch/x86/include/asm/clocksource.h
index a5df33f614c9..3882c65dc19b 100644
--- a/arch/x86/include/asm/clocksource.h
+++ b/arch/x86/include/asm/clocksource.h
@@ -7,8 +7,12 @@
7 7
8#define __ARCH_HAS_CLOCKSOURCE_DATA 8#define __ARCH_HAS_CLOCKSOURCE_DATA
9 9
10#define VCLOCK_NONE 0 /* No vDSO clock available. */
11#define VCLOCK_TSC 1 /* vDSO should use vread_tsc. */
12#define VCLOCK_HPET 2 /* vDSO should use vread_hpet. */
13
10struct arch_clocksource_data { 14struct arch_clocksource_data {
11 cycle_t (*vread)(void); 15 int vclock_mode;
12}; 16};
13 17
14#endif /* CONFIG_X86_64 */ 18#endif /* CONFIG_X86_64 */
diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h
index 9db5583b6d38..83e2efd181e2 100644
--- a/arch/x86/include/asm/tsc.h
+++ b/arch/x86/include/asm/tsc.h
@@ -51,10 +51,6 @@ extern int unsynchronized_tsc(void);
51extern int check_tsc_unstable(void); 51extern int check_tsc_unstable(void);
52extern unsigned long native_calibrate_tsc(void); 52extern unsigned long native_calibrate_tsc(void);
53 53
54#ifdef CONFIG_X86_64
55extern cycles_t vread_tsc(void);
56#endif
57
58/* 54/*
59 * Boot-time check whether the TSCs are synchronized across 55 * Boot-time check whether the TSCs are synchronized across
60 * all CPUs/cores: 56 * all CPUs/cores:
diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h
index aa5add855a91..815285bcaceb 100644
--- a/arch/x86/include/asm/vgtod.h
+++ b/arch/x86/include/asm/vgtod.h
@@ -13,7 +13,7 @@ struct vsyscall_gtod_data {
13 13
14 struct timezone sys_tz; 14 struct timezone sys_tz;
15 struct { /* extract of a clocksource struct */ 15 struct { /* extract of a clocksource struct */
16 cycle_t (*vread)(void); 16 int vclock_mode;
17 cycle_t cycle_last; 17 cycle_t cycle_last;
18 cycle_t mask; 18 cycle_t mask;
19 u32 mult; 19 u32 mult;
diff --git a/arch/x86/include/asm/vsyscall.h b/arch/x86/include/asm/vsyscall.h
index d55597351f6a..60107072c28b 100644
--- a/arch/x86/include/asm/vsyscall.h
+++ b/arch/x86/include/asm/vsyscall.h
@@ -16,10 +16,6 @@ enum vsyscall_num {
16#ifdef __KERNEL__ 16#ifdef __KERNEL__
17#include <linux/seqlock.h> 17#include <linux/seqlock.h>
18 18
19/* Definitions for CONFIG_GENERIC_TIME definitions */
20#define __vsyscall_fn \
21 __attribute__ ((unused, __section__(".vsyscall_fn"))) notrace
22
23#define VGETCPU_RDTSCP 1 19#define VGETCPU_RDTSCP 1
24#define VGETCPU_LSL 2 20#define VGETCPU_LSL 2
25 21
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index cc0469a65120..2deef3d2435a 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -24,17 +24,12 @@ endif
24nostackp := $(call cc-option, -fno-stack-protector) 24nostackp := $(call cc-option, -fno-stack-protector)
25CFLAGS_vsyscall_64.o := $(PROFILING) -g0 $(nostackp) 25CFLAGS_vsyscall_64.o := $(PROFILING) -g0 $(nostackp)
26CFLAGS_hpet.o := $(nostackp) 26CFLAGS_hpet.o := $(nostackp)
27CFLAGS_vread_tsc_64.o := $(nostackp)
28CFLAGS_paravirt.o := $(nostackp) 27CFLAGS_paravirt.o := $(nostackp)
29GCOV_PROFILE_vsyscall_64.o := n 28GCOV_PROFILE_vsyscall_64.o := n
30GCOV_PROFILE_hpet.o := n 29GCOV_PROFILE_hpet.o := n
31GCOV_PROFILE_tsc.o := n 30GCOV_PROFILE_tsc.o := n
32GCOV_PROFILE_vread_tsc_64.o := n
33GCOV_PROFILE_paravirt.o := n 31GCOV_PROFILE_paravirt.o := n
34 32
35# vread_tsc_64 is hot and should be fully optimized:
36CFLAGS_REMOVE_vread_tsc_64.o = -pg -fno-optimize-sibling-calls
37
38obj-y := process_$(BITS).o signal.o entry_$(BITS).o 33obj-y := process_$(BITS).o signal.o entry_$(BITS).o
39obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o 34obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
40obj-y += time.o ioport.o ldt.o dumpstack.o 35obj-y += time.o ioport.o ldt.o dumpstack.o
@@ -43,7 +38,7 @@ obj-$(CONFIG_IRQ_WORK) += irq_work.o
43obj-y += probe_roms.o 38obj-y += probe_roms.o
44obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o 39obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o
45obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o 40obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o
46obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o vread_tsc_64.o 41obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o
47obj-$(CONFIG_X86_64) += vsyscall_emu_64.o 42obj-$(CONFIG_X86_64) += vsyscall_emu_64.o
48obj-y += bootflag.o e820.o 43obj-y += bootflag.o e820.o
49obj-y += pci-dma.o quirks.o topology.o kdebugfs.o 44obj-y += pci-dma.o quirks.o topology.o kdebugfs.o
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index ddb207bb5f91..c63822816249 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -14,7 +14,6 @@
14#include <asm/pgtable.h> 14#include <asm/pgtable.h>
15#include <asm/mce.h> 15#include <asm/mce.h>
16#include <asm/nmi.h> 16#include <asm/nmi.h>
17#include <asm/vsyscall.h>
18#include <asm/cacheflush.h> 17#include <asm/cacheflush.h>
19#include <asm/tlbflush.h> 18#include <asm/tlbflush.h>
20#include <asm/io.h> 19#include <asm/io.h>
@@ -250,7 +249,6 @@ static void __init_or_module add_nops(void *insns, unsigned int len)
250 249
251extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; 250extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
252extern s32 __smp_locks[], __smp_locks_end[]; 251extern s32 __smp_locks[], __smp_locks_end[];
253extern char __vsyscall_0;
254void *text_poke_early(void *addr, const void *opcode, size_t len); 252void *text_poke_early(void *addr, const void *opcode, size_t len);
255 253
256/* Replace instructions with better alternatives for this CPU type. 254/* Replace instructions with better alternatives for this CPU type.
@@ -294,12 +292,6 @@ void __init_or_module apply_alternatives(struct alt_instr *start,
294 add_nops(insnbuf + a->replacementlen, 292 add_nops(insnbuf + a->replacementlen,
295 a->instrlen - a->replacementlen); 293 a->instrlen - a->replacementlen);
296 294
297#ifdef CONFIG_X86_64
298 /* vsyscall code is not mapped yet. resolve it manually. */
299 if (instr >= (u8 *)VSYSCALL_START && instr < (u8*)VSYSCALL_END) {
300 instr = __va(instr - (u8*)VSYSCALL_START + (u8*)__pa_symbol(&__vsyscall_0));
301 }
302#endif
303 text_poke_early(instr, insnbuf, a->instrlen); 295 text_poke_early(instr, insnbuf, a->instrlen);
304 } 296 }
305} 297}
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 0e07257bb389..d10cc009845f 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -738,13 +738,6 @@ static cycle_t read_hpet(struct clocksource *cs)
738 return (cycle_t)hpet_readl(HPET_COUNTER); 738 return (cycle_t)hpet_readl(HPET_COUNTER);
739} 739}
740 740
741#ifdef CONFIG_X86_64
742static cycle_t __vsyscall_fn vread_hpet(void)
743{
744 return readl((const void __iomem *)fix_to_virt(VSYSCALL_HPET) + 0xf0);
745}
746#endif
747
748static struct clocksource clocksource_hpet = { 741static struct clocksource clocksource_hpet = {
749 .name = "hpet", 742 .name = "hpet",
750 .rating = 250, 743 .rating = 250,
@@ -753,7 +746,7 @@ static struct clocksource clocksource_hpet = {
753 .flags = CLOCK_SOURCE_IS_CONTINUOUS, 746 .flags = CLOCK_SOURCE_IS_CONTINUOUS,
754 .resume = hpet_resume_counter, 747 .resume = hpet_resume_counter,
755#ifdef CONFIG_X86_64 748#ifdef CONFIG_X86_64
756 .archdata = { .vread = vread_hpet }, 749 .archdata = { .vclock_mode = VCLOCK_HPET },
757#endif 750#endif
758}; 751};
759 752
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index e7a74b889ab3..56c633a5db72 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -777,7 +777,7 @@ static struct clocksource clocksource_tsc = {
777 .flags = CLOCK_SOURCE_IS_CONTINUOUS | 777 .flags = CLOCK_SOURCE_IS_CONTINUOUS |
778 CLOCK_SOURCE_MUST_VERIFY, 778 CLOCK_SOURCE_MUST_VERIFY,
779#ifdef CONFIG_X86_64 779#ifdef CONFIG_X86_64
780 .archdata = { .vread = vread_tsc }, 780 .archdata = { .vclock_mode = VCLOCK_TSC },
781#endif 781#endif
782}; 782};
783 783
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 80174719910c..4aa9c54a9b76 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -169,9 +169,6 @@ SECTIONS
169 .vsyscall : AT(VLOAD(.vsyscall)) { 169 .vsyscall : AT(VLOAD(.vsyscall)) {
170 *(.vsyscall_0) 170 *(.vsyscall_0)
171 171
172 . = ALIGN(L1_CACHE_BYTES);
173 *(.vsyscall_fn)
174
175 . = 1024; 172 . = 1024;
176 *(.vsyscall_1) 173 *(.vsyscall_1)
177 174
diff --git a/arch/x86/kernel/vread_tsc_64.c b/arch/x86/kernel/vread_tsc_64.c
deleted file mode 100644
index a81aa9e9894c..000000000000
--- a/arch/x86/kernel/vread_tsc_64.c
+++ /dev/null
@@ -1,36 +0,0 @@
1/* This code runs in userspace. */
2
3#define DISABLE_BRANCH_PROFILING
4#include <asm/vgtod.h>
5
6notrace cycle_t __vsyscall_fn vread_tsc(void)
7{
8 cycle_t ret;
9 u64 last;
10
11 /*
12 * Empirically, a fence (of type that depends on the CPU)
13 * before rdtsc is enough to ensure that rdtsc is ordered
14 * with respect to loads. The various CPU manuals are unclear
15 * as to whether rdtsc can be reordered with later loads,
16 * but no one has ever seen it happen.
17 */
18 rdtsc_barrier();
19 ret = (cycle_t)vget_cycles();
20
21 last = VVAR(vsyscall_gtod_data).clock.cycle_last;
22
23 if (likely(ret >= last))
24 return ret;
25
26 /*
27 * GCC likes to generate cmov here, but this branch is extremely
28 * predictable (it's just a funciton of time and the likely is
29 * very likely) and there's a data dependence, so force GCC
30 * to generate a branch instead. I don't barrier() because
31 * we don't actually need a barrier, and if this function
32 * ever gets inlined it will generate worse code.
33 */
34 asm volatile ("");
35 return last;
36}
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index 12d488fd95d9..dda7dff9cef7 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -74,7 +74,7 @@ void update_vsyscall(struct timespec *wall_time, struct timespec *wtm,
74 write_seqlock_irqsave(&vsyscall_gtod_data.lock, flags); 74 write_seqlock_irqsave(&vsyscall_gtod_data.lock, flags);
75 75
76 /* copy vsyscall data */ 76 /* copy vsyscall data */
77 vsyscall_gtod_data.clock.vread = clock->archdata.vread; 77 vsyscall_gtod_data.clock.vclock_mode = clock->archdata.vclock_mode;
78 vsyscall_gtod_data.clock.cycle_last = clock->cycle_last; 78 vsyscall_gtod_data.clock.cycle_last = clock->cycle_last;
79 vsyscall_gtod_data.clock.mask = clock->mask; 79 vsyscall_gtod_data.clock.mask = clock->mask;
80 vsyscall_gtod_data.clock.mult = mult; 80 vsyscall_gtod_data.clock.mult = mult;
diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c
index cf54813ac527..8792d6e0a2c3 100644
--- a/arch/x86/vdso/vclock_gettime.c
+++ b/arch/x86/vdso/vclock_gettime.c
@@ -17,6 +17,7 @@
17#include <linux/time.h> 17#include <linux/time.h>
18#include <linux/string.h> 18#include <linux/string.h>
19#include <asm/vsyscall.h> 19#include <asm/vsyscall.h>
20#include <asm/fixmap.h>
20#include <asm/vgtod.h> 21#include <asm/vgtod.h>
21#include <asm/timex.h> 22#include <asm/timex.h>
22#include <asm/hpet.h> 23#include <asm/hpet.h>
@@ -25,6 +26,43 @@
25 26
26#define gtod (&VVAR(vsyscall_gtod_data)) 27#define gtod (&VVAR(vsyscall_gtod_data))
27 28
29notrace static cycle_t vread_tsc(void)
30{
31 cycle_t ret;
32 u64 last;
33
34 /*
35 * Empirically, a fence (of type that depends on the CPU)
36 * before rdtsc is enough to ensure that rdtsc is ordered
37 * with respect to loads. The various CPU manuals are unclear
38 * as to whether rdtsc can be reordered with later loads,
39 * but no one has ever seen it happen.
40 */
41 rdtsc_barrier();
42 ret = (cycle_t)vget_cycles();
43
44 last = VVAR(vsyscall_gtod_data).clock.cycle_last;
45
46 if (likely(ret >= last))
47 return ret;
48
49 /*
50 * GCC likes to generate cmov here, but this branch is extremely
51 * predictable (it's just a funciton of time and the likely is
52 * very likely) and there's a data dependence, so force GCC
53 * to generate a branch instead. I don't barrier() because
54 * we don't actually need a barrier, and if this function
55 * ever gets inlined it will generate worse code.
56 */
57 asm volatile ("");
58 return last;
59}
60
61static notrace cycle_t vread_hpet(void)
62{
63 return readl((const void __iomem *)fix_to_virt(VSYSCALL_HPET) + 0xf0);
64}
65
28notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) 66notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
29{ 67{
30 long ret; 68 long ret;
@@ -36,9 +74,12 @@ notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
36notrace static inline long vgetns(void) 74notrace static inline long vgetns(void)
37{ 75{
38 long v; 76 long v;
39 cycles_t (*vread)(void); 77 cycles_t cycles;
40 vread = gtod->clock.vread; 78 if (gtod->clock.vclock_mode == VCLOCK_TSC)
41 v = (vread() - gtod->clock.cycle_last) & gtod->clock.mask; 79 cycles = vread_tsc();
80 else
81 cycles = vread_hpet();
82 v = (cycles - gtod->clock.cycle_last) & gtod->clock.mask;
42 return (v * gtod->clock.mult) >> gtod->clock.shift; 83 return (v * gtod->clock.mult) >> gtod->clock.shift;
43} 84}
44 85
@@ -118,11 +159,11 @@ notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
118{ 159{
119 switch (clock) { 160 switch (clock) {
120 case CLOCK_REALTIME: 161 case CLOCK_REALTIME:
121 if (likely(gtod->clock.vread)) 162 if (likely(gtod->clock.vclock_mode != VCLOCK_NONE))
122 return do_realtime(ts); 163 return do_realtime(ts);
123 break; 164 break;
124 case CLOCK_MONOTONIC: 165 case CLOCK_MONOTONIC:
125 if (likely(gtod->clock.vread)) 166 if (likely(gtod->clock.vclock_mode != VCLOCK_NONE))
126 return do_monotonic(ts); 167 return do_monotonic(ts);
127 break; 168 break;
128 case CLOCK_REALTIME_COARSE: 169 case CLOCK_REALTIME_COARSE:
@@ -139,7 +180,7 @@ int clock_gettime(clockid_t, struct timespec *)
139notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz) 180notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
140{ 181{
141 long ret; 182 long ret;
142 if (likely(gtod->clock.vread)) { 183 if (likely(gtod->clock.vclock_mode != VCLOCK_NONE)) {
143 if (likely(tv != NULL)) { 184 if (likely(tv != NULL)) {
144 BUILD_BUG_ON(offsetof(struct timeval, tv_usec) != 185 BUILD_BUG_ON(offsetof(struct timeval, tv_usec) !=
145 offsetof(struct timespec, tv_nsec) || 186 offsetof(struct timespec, tv_nsec) ||