diff options
author | Andy Lutomirski <luto@MIT.EDU> | 2011-06-05 13:50:20 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2011-06-05 15:30:33 -0400 |
commit | 0d7b8547fb67d5c2a7d954c56b3715b0e708be4a (patch) | |
tree | f2d8b6c9c06b751f1b2c1b0665e761ce8e28a358 /arch/x86 | |
parent | 9fd67b4ed0714ab718f1f9bd14c344af336a6df7 (diff) |
x86-64: Remove kernel.vsyscall64 sysctl
It's unnecessary overhead in code that's supposed to be highly
optimized. Removing it allows us to remove one of the two
syscall instructions in the vsyscall page.
The only sensible use for it is for UML users, and it doesn't
fully address inconsistent vsyscall results on UML. The real
fix for UML is to stop using vsyscalls entirely.
Signed-off-by: Andy Lutomirski <luto@mit.edu>
Cc: Jesper Juhl <jj@chaosbits.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Arjan van de Ven <arjan@infradead.org>
Cc: Jan Beulich <JBeulich@novell.com>
Cc: richard -rw- weinberger <richard.weinberger@gmail.com>
Cc: Mikael Pettersson <mikpe@it.uu.se>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Louis Rilling <Louis.Rilling@kerlabs.com>
Cc: Valdis.Kletnieks@vt.edu
Cc: pageexec@freemail.hu
Link: http://lkml.kernel.org/r/973ae803fe76f712da4b2740e66dccf452d3b1e4.1307292171.git.luto@mit.edu
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/include/asm/vgtod.h | 1 | ||||
-rw-r--r-- | arch/x86/kernel/vsyscall_64.c | 34 | ||||
-rw-r--r-- | arch/x86/vdso/vclock_gettime.c | 55 |
3 files changed, 22 insertions, 68 deletions
diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h index 646b4c1ca695..aa5add855a91 100644 --- a/arch/x86/include/asm/vgtod.h +++ b/arch/x86/include/asm/vgtod.h | |||
@@ -11,7 +11,6 @@ struct vsyscall_gtod_data { | |||
11 | time_t wall_time_sec; | 11 | time_t wall_time_sec; |
12 | u32 wall_time_nsec; | 12 | u32 wall_time_nsec; |
13 | 13 | ||
14 | int sysctl_enabled; | ||
15 | struct timezone sys_tz; | 14 | struct timezone sys_tz; |
16 | struct { /* extract of a clocksource struct */ | 15 | struct { /* extract of a clocksource struct */ |
17 | cycle_t (*vread)(void); | 16 | cycle_t (*vread)(void); |
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index 3cf1cef75a6a..9b2f3f51bc91 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c | |||
@@ -53,7 +53,6 @@ DEFINE_VVAR(int, vgetcpu_mode); | |||
53 | DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data) = | 53 | DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data) = |
54 | { | 54 | { |
55 | .lock = __SEQLOCK_UNLOCKED(__vsyscall_gtod_data.lock), | 55 | .lock = __SEQLOCK_UNLOCKED(__vsyscall_gtod_data.lock), |
56 | .sysctl_enabled = 1, | ||
57 | }; | 56 | }; |
58 | 57 | ||
59 | void update_vsyscall_tz(void) | 58 | void update_vsyscall_tz(void) |
@@ -103,15 +102,6 @@ static __always_inline int gettimeofday(struct timeval *tv, struct timezone *tz) | |||
103 | return ret; | 102 | return ret; |
104 | } | 103 | } |
105 | 104 | ||
106 | static __always_inline long time_syscall(long *t) | ||
107 | { | ||
108 | long secs; | ||
109 | asm volatile("syscall" | ||
110 | : "=a" (secs) | ||
111 | : "0" (__NR_time),"D" (t) : __syscall_clobber); | ||
112 | return secs; | ||
113 | } | ||
114 | |||
115 | static __always_inline void do_vgettimeofday(struct timeval * tv) | 105 | static __always_inline void do_vgettimeofday(struct timeval * tv) |
116 | { | 106 | { |
117 | cycle_t now, base, mask, cycle_delta; | 107 | cycle_t now, base, mask, cycle_delta; |
@@ -122,8 +112,7 @@ static __always_inline void do_vgettimeofday(struct timeval * tv) | |||
122 | seq = read_seqbegin(&VVAR(vsyscall_gtod_data).lock); | 112 | seq = read_seqbegin(&VVAR(vsyscall_gtod_data).lock); |
123 | 113 | ||
124 | vread = VVAR(vsyscall_gtod_data).clock.vread; | 114 | vread = VVAR(vsyscall_gtod_data).clock.vread; |
125 | if (unlikely(!VVAR(vsyscall_gtod_data).sysctl_enabled || | 115 | if (unlikely(!vread)) { |
126 | !vread)) { | ||
127 | gettimeofday(tv,NULL); | 116 | gettimeofday(tv,NULL); |
128 | return; | 117 | return; |
129 | } | 118 | } |
@@ -165,8 +154,6 @@ time_t __vsyscall(1) vtime(time_t *t) | |||
165 | { | 154 | { |
166 | unsigned seq; | 155 | unsigned seq; |
167 | time_t result; | 156 | time_t result; |
168 | if (unlikely(!VVAR(vsyscall_gtod_data).sysctl_enabled)) | ||
169 | return time_syscall(t); | ||
170 | 157 | ||
171 | do { | 158 | do { |
172 | seq = read_seqbegin(&VVAR(vsyscall_gtod_data).lock); | 159 | seq = read_seqbegin(&VVAR(vsyscall_gtod_data).lock); |
@@ -227,22 +214,6 @@ static long __vsyscall(3) venosys_1(void) | |||
227 | return -ENOSYS; | 214 | return -ENOSYS; |
228 | } | 215 | } |
229 | 216 | ||
230 | #ifdef CONFIG_SYSCTL | ||
231 | static ctl_table kernel_table2[] = { | ||
232 | { .procname = "vsyscall64", | ||
233 | .data = &vsyscall_gtod_data.sysctl_enabled, .maxlen = sizeof(int), | ||
234 | .mode = 0644, | ||
235 | .proc_handler = proc_dointvec }, | ||
236 | {} | ||
237 | }; | ||
238 | |||
239 | static ctl_table kernel_root_table2[] = { | ||
240 | { .procname = "kernel", .mode = 0555, | ||
241 | .child = kernel_table2 }, | ||
242 | {} | ||
243 | }; | ||
244 | #endif | ||
245 | |||
246 | /* Assume __initcall executes before all user space. Hopefully kmod | 217 | /* Assume __initcall executes before all user space. Hopefully kmod |
247 | doesn't violate that. We'll find out if it does. */ | 218 | doesn't violate that. We'll find out if it does. */ |
248 | static void __cpuinit vsyscall_set_cpu(int cpu) | 219 | static void __cpuinit vsyscall_set_cpu(int cpu) |
@@ -301,9 +272,6 @@ static int __init vsyscall_init(void) | |||
301 | BUG_ON((unsigned long) &vtime != VSYSCALL_ADDR(__NR_vtime)); | 272 | BUG_ON((unsigned long) &vtime != VSYSCALL_ADDR(__NR_vtime)); |
302 | BUG_ON((VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE))); | 273 | BUG_ON((VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE))); |
303 | BUG_ON((unsigned long) &vgetcpu != VSYSCALL_ADDR(__NR_vgetcpu)); | 274 | BUG_ON((unsigned long) &vgetcpu != VSYSCALL_ADDR(__NR_vgetcpu)); |
304 | #ifdef CONFIG_SYSCTL | ||
305 | register_sysctl_table(kernel_root_table2); | ||
306 | #endif | ||
307 | on_each_cpu(cpu_vsyscall_init, NULL, 1); | 275 | on_each_cpu(cpu_vsyscall_init, NULL, 1); |
308 | /* notifier priority > KVM */ | 276 | /* notifier priority > KVM */ |
309 | hotcpu_notifier(cpu_vsyscall_notifier, 30); | 277 | hotcpu_notifier(cpu_vsyscall_notifier, 30); |
diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c index a724905fdae7..cf54813ac527 100644 --- a/arch/x86/vdso/vclock_gettime.c +++ b/arch/x86/vdso/vclock_gettime.c | |||
@@ -116,21 +116,21 @@ notrace static noinline int do_monotonic_coarse(struct timespec *ts) | |||
116 | 116 | ||
117 | notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts) | 117 | notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts) |
118 | { | 118 | { |
119 | if (likely(gtod->sysctl_enabled)) | 119 | switch (clock) { |
120 | switch (clock) { | 120 | case CLOCK_REALTIME: |
121 | case CLOCK_REALTIME: | 121 | if (likely(gtod->clock.vread)) |
122 | if (likely(gtod->clock.vread)) | 122 | return do_realtime(ts); |
123 | return do_realtime(ts); | 123 | break; |
124 | break; | 124 | case CLOCK_MONOTONIC: |
125 | case CLOCK_MONOTONIC: | 125 | if (likely(gtod->clock.vread)) |
126 | if (likely(gtod->clock.vread)) | 126 | return do_monotonic(ts); |
127 | return do_monotonic(ts); | 127 | break; |
128 | break; | 128 | case CLOCK_REALTIME_COARSE: |
129 | case CLOCK_REALTIME_COARSE: | 129 | return do_realtime_coarse(ts); |
130 | return do_realtime_coarse(ts); | 130 | case CLOCK_MONOTONIC_COARSE: |
131 | case CLOCK_MONOTONIC_COARSE: | 131 | return do_monotonic_coarse(ts); |
132 | return do_monotonic_coarse(ts); | 132 | } |
133 | } | 133 | |
134 | return vdso_fallback_gettime(clock, ts); | 134 | return vdso_fallback_gettime(clock, ts); |
135 | } | 135 | } |
136 | int clock_gettime(clockid_t, struct timespec *) | 136 | int clock_gettime(clockid_t, struct timespec *) |
@@ -139,7 +139,7 @@ int clock_gettime(clockid_t, struct timespec *) | |||
139 | notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz) | 139 | notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz) |
140 | { | 140 | { |
141 | long ret; | 141 | long ret; |
142 | if (likely(gtod->sysctl_enabled && gtod->clock.vread)) { | 142 | if (likely(gtod->clock.vread)) { |
143 | if (likely(tv != NULL)) { | 143 | if (likely(tv != NULL)) { |
144 | BUILD_BUG_ON(offsetof(struct timeval, tv_usec) != | 144 | BUILD_BUG_ON(offsetof(struct timeval, tv_usec) != |
145 | offsetof(struct timespec, tv_nsec) || | 145 | offsetof(struct timespec, tv_nsec) || |
@@ -161,27 +161,14 @@ notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz) | |||
161 | int gettimeofday(struct timeval *, struct timezone *) | 161 | int gettimeofday(struct timeval *, struct timezone *) |
162 | __attribute__((weak, alias("__vdso_gettimeofday"))); | 162 | __attribute__((weak, alias("__vdso_gettimeofday"))); |
163 | 163 | ||
164 | /* This will break when the xtime seconds get inaccurate, but that is | 164 | /* |
165 | * unlikely */ | 165 | * This will break when the xtime seconds get inaccurate, but that is |
166 | 166 | * unlikely | |
167 | static __always_inline long time_syscall(long *t) | 167 | */ |
168 | { | ||
169 | long secs; | ||
170 | asm volatile("syscall" | ||
171 | : "=a" (secs) | ||
172 | : "0" (__NR_time), "D" (t) : "cc", "r11", "cx", "memory"); | ||
173 | return secs; | ||
174 | } | ||
175 | |||
176 | notrace time_t __vdso_time(time_t *t) | 168 | notrace time_t __vdso_time(time_t *t) |
177 | { | 169 | { |
178 | time_t result; | ||
179 | |||
180 | if (unlikely(!VVAR(vsyscall_gtod_data).sysctl_enabled)) | ||
181 | return time_syscall(t); | ||
182 | |||
183 | /* This is atomic on x86_64 so we don't need any locks. */ | 170 | /* This is atomic on x86_64 so we don't need any locks. */ |
184 | result = ACCESS_ONCE(VVAR(vsyscall_gtod_data).wall_time_sec); | 171 | time_t result = ACCESS_ONCE(VVAR(vsyscall_gtod_data).wall_time_sec); |
185 | 172 | ||
186 | if (t) | 173 | if (t) |
187 | *t = result; | 174 | *t = result; |