aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
authorAndy Lutomirski <luto@MIT.EDU>2011-05-23 09:31:24 -0400
committerThomas Gleixner <tglx@linutronix.de>2011-05-24 08:51:28 -0400
commit8c49d9a74bac5ea3f18480307057241b808fcc0c (patch)
treea9d2d8160ca37e9292e605cf3cdd85b29646c4b7 /arch/x86/kernel
parentd762f4383100c2a87b1a3f2d678cd3b5425655b4 (diff)
x86-64: Clean up vdso/kernel shared variables
Variables that are shared between the vdso and the kernel are currently a bit of a mess. They are each defined with their own magic, they are accessed differently in the kernel, the vsyscall page, and the vdso, and one of them (vsyscall_clock) doesn't even really exist. This changes them all to use a common mechanism. All of them are delcared in vvar.h with a fixed address (validated by the linker script). In the kernel (as before), they look like ordinary read-write variables. In the vsyscall page and the vdso, they are accessed through a new macro VVAR, which gives read-only access. The vdso is now loaded verbatim into memory without any fixups. As a side bonus, access from the vdso is faster because a level of indirection is removed. While we're at it, pack jiffies and vgetcpu_mode into the same cacheline. Signed-off-by: Andy Lutomirski <luto@mit.edu> Cc: Andi Kleen <andi@firstfloor.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: "David S. Miller" <davem@davemloft.net> Cc: Eric Dumazet <eric.dumazet@gmail.com> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Borislav Petkov <bp@amd64.org> Link: http://lkml.kernel.org/r/%3C7357882fbb51fa30491636a7b6528747301b7ee9.1306156808.git.luto%40mit.edu%3E Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/time.c2
-rw-r--r--arch/x86/kernel/tsc.c4
-rw-r--r--arch/x86/kernel/vmlinux.lds.S34
-rw-r--r--arch/x86/kernel/vsyscall_64.c46
4 files changed, 34 insertions, 52 deletions
diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c
index 25a28a245937..00cbb272627f 100644
--- a/arch/x86/kernel/time.c
+++ b/arch/x86/kernel/time.c
@@ -23,7 +23,7 @@
23#include <asm/time.h> 23#include <asm/time.h>
24 24
25#ifdef CONFIG_X86_64 25#ifdef CONFIG_X86_64
26volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES; 26DEFINE_VVAR(volatile unsigned long, jiffies) = INITIAL_JIFFIES;
27#endif 27#endif
28 28
29unsigned long profile_pc(struct pt_regs *regs) 29unsigned long profile_pc(struct pt_regs *regs)
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 9335bf7dd2e7..db697b81b8b1 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -777,8 +777,8 @@ static cycle_t __vsyscall_fn vread_tsc(void)
777 ret = (cycle_t)vget_cycles(); 777 ret = (cycle_t)vget_cycles();
778 rdtsc_barrier(); 778 rdtsc_barrier();
779 779
780 return ret >= __vsyscall_gtod_data.clock.cycle_last ? 780 return ret >= VVAR(vsyscall_gtod_data).clock.cycle_last ?
781 ret : __vsyscall_gtod_data.clock.cycle_last; 781 ret : VVAR(vsyscall_gtod_data).clock.cycle_last;
782} 782}
783#endif 783#endif
784 784
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 49927a863cc1..74b5ad450c7e 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -161,6 +161,12 @@ SECTIONS
161 161
162#define VVIRT_OFFSET (VSYSCALL_ADDR - __vsyscall_0) 162#define VVIRT_OFFSET (VSYSCALL_ADDR - __vsyscall_0)
163#define VVIRT(x) (ADDR(x) - VVIRT_OFFSET) 163#define VVIRT(x) (ADDR(x) - VVIRT_OFFSET)
164#define EMIT_VVAR(x, offset) .vsyscall_var_ ## x \
165 ADDR(.vsyscall_0) + offset \
166 : AT(VLOAD(.vsyscall_var_ ## x)) { \
167 *(.vsyscall_var_ ## x) \
168 } \
169 x = VVIRT(.vsyscall_var_ ## x);
164 170
165 . = ALIGN(4096); 171 . = ALIGN(4096);
166 __vsyscall_0 = .; 172 __vsyscall_0 = .;
@@ -175,18 +181,6 @@ SECTIONS
175 *(.vsyscall_fn) 181 *(.vsyscall_fn)
176 } 182 }
177 183
178 . = ALIGN(L1_CACHE_BYTES);
179 .vsyscall_gtod_data : AT(VLOAD(.vsyscall_gtod_data)) {
180 *(.vsyscall_gtod_data)
181 }
182
183 vsyscall_gtod_data = VVIRT(.vsyscall_gtod_data);
184 .vsyscall_clock : AT(VLOAD(.vsyscall_clock)) {
185 *(.vsyscall_clock)
186 }
187 vsyscall_clock = VVIRT(.vsyscall_clock);
188
189
190 .vsyscall_1 ADDR(.vsyscall_0) + 1024: AT(VLOAD(.vsyscall_1)) { 184 .vsyscall_1 ADDR(.vsyscall_0) + 1024: AT(VLOAD(.vsyscall_1)) {
191 *(.vsyscall_1) 185 *(.vsyscall_1)
192 } 186 }
@@ -194,21 +188,14 @@ SECTIONS
194 *(.vsyscall_2) 188 *(.vsyscall_2)
195 } 189 }
196 190
197 .vgetcpu_mode : AT(VLOAD(.vgetcpu_mode)) {
198 *(.vgetcpu_mode)
199 }
200 vgetcpu_mode = VVIRT(.vgetcpu_mode);
201
202 . = ALIGN(L1_CACHE_BYTES);
203 .jiffies : AT(VLOAD(.jiffies)) {
204 *(.jiffies)
205 }
206 jiffies = VVIRT(.jiffies);
207
208 .vsyscall_3 ADDR(.vsyscall_0) + 3072: AT(VLOAD(.vsyscall_3)) { 191 .vsyscall_3 ADDR(.vsyscall_0) + 3072: AT(VLOAD(.vsyscall_3)) {
209 *(.vsyscall_3) 192 *(.vsyscall_3)
210 } 193 }
211 194
195#define __VVAR_KERNEL_LDS
196#include <asm/vvar.h>
197#undef __VVAR_KERNEL_LDS
198
212 . = __vsyscall_0 + PAGE_SIZE; 199 . = __vsyscall_0 + PAGE_SIZE;
213 200
214#undef VSYSCALL_ADDR 201#undef VSYSCALL_ADDR
@@ -216,6 +203,7 @@ SECTIONS
216#undef VLOAD 203#undef VLOAD
217#undef VVIRT_OFFSET 204#undef VVIRT_OFFSET
218#undef VVIRT 205#undef VVIRT
206#undef EMIT_VVAR
219 207
220#endif /* CONFIG_X86_64 */ 208#endif /* CONFIG_X86_64 */
221 209
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index dcbb28c4b694..5f6ad032575a 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -49,15 +49,8 @@
49 __attribute__ ((unused, __section__(".vsyscall_" #nr))) notrace 49 __attribute__ ((unused, __section__(".vsyscall_" #nr))) notrace
50#define __syscall_clobber "r11","cx","memory" 50#define __syscall_clobber "r11","cx","memory"
51 51
52/* 52DEFINE_VVAR(int, vgetcpu_mode);
53 * vsyscall_gtod_data contains data that is : 53DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data) =
54 * - readonly from vsyscalls
55 * - written by timer interrupt or systcl (/proc/sys/kernel/vsyscall64)
56 * Try to keep this structure as small as possible to avoid cache line ping pongs
57 */
58int __vgetcpu_mode __section_vgetcpu_mode;
59
60struct vsyscall_gtod_data __vsyscall_gtod_data __section_vsyscall_gtod_data =
61{ 54{
62 .lock = SEQLOCK_UNLOCKED, 55 .lock = SEQLOCK_UNLOCKED,
63 .sysctl_enabled = 1, 56 .sysctl_enabled = 1,
@@ -97,7 +90,7 @@ void update_vsyscall(struct timespec *wall_time, struct timespec *wtm,
97 */ 90 */
98static __always_inline void do_get_tz(struct timezone * tz) 91static __always_inline void do_get_tz(struct timezone * tz)
99{ 92{
100 *tz = __vsyscall_gtod_data.sys_tz; 93 *tz = VVAR(vsyscall_gtod_data).sys_tz;
101} 94}
102 95
103static __always_inline int gettimeofday(struct timeval *tv, struct timezone *tz) 96static __always_inline int gettimeofday(struct timeval *tv, struct timezone *tz)
@@ -126,23 +119,24 @@ static __always_inline void do_vgettimeofday(struct timeval * tv)
126 unsigned long mult, shift, nsec; 119 unsigned long mult, shift, nsec;
127 cycle_t (*vread)(void); 120 cycle_t (*vread)(void);
128 do { 121 do {
129 seq = read_seqbegin(&__vsyscall_gtod_data.lock); 122 seq = read_seqbegin(&VVAR(vsyscall_gtod_data).lock);
130 123
131 vread = __vsyscall_gtod_data.clock.vread; 124 vread = VVAR(vsyscall_gtod_data).clock.vread;
132 if (unlikely(!__vsyscall_gtod_data.sysctl_enabled || !vread)) { 125 if (unlikely(!VVAR(vsyscall_gtod_data).sysctl_enabled ||
126 !vread)) {
133 gettimeofday(tv,NULL); 127 gettimeofday(tv,NULL);
134 return; 128 return;
135 } 129 }
136 130
137 now = vread(); 131 now = vread();
138 base = __vsyscall_gtod_data.clock.cycle_last; 132 base = VVAR(vsyscall_gtod_data).clock.cycle_last;
139 mask = __vsyscall_gtod_data.clock.mask; 133 mask = VVAR(vsyscall_gtod_data).clock.mask;
140 mult = __vsyscall_gtod_data.clock.mult; 134 mult = VVAR(vsyscall_gtod_data).clock.mult;
141 shift = __vsyscall_gtod_data.clock.shift; 135 shift = VVAR(vsyscall_gtod_data).clock.shift;
142 136
143 tv->tv_sec = __vsyscall_gtod_data.wall_time_sec; 137 tv->tv_sec = VVAR(vsyscall_gtod_data).wall_time_sec;
144 nsec = __vsyscall_gtod_data.wall_time_nsec; 138 nsec = VVAR(vsyscall_gtod_data).wall_time_nsec;
145 } while (read_seqretry(&__vsyscall_gtod_data.lock, seq)); 139 } while (read_seqretry(&VVAR(vsyscall_gtod_data).lock, seq));
146 140
147 /* calculate interval: */ 141 /* calculate interval: */
148 cycle_delta = (now - base) & mask; 142 cycle_delta = (now - base) & mask;
@@ -171,15 +165,15 @@ time_t __vsyscall(1) vtime(time_t *t)
171{ 165{
172 unsigned seq; 166 unsigned seq;
173 time_t result; 167 time_t result;
174 if (unlikely(!__vsyscall_gtod_data.sysctl_enabled)) 168 if (unlikely(!VVAR(vsyscall_gtod_data).sysctl_enabled))
175 return time_syscall(t); 169 return time_syscall(t);
176 170
177 do { 171 do {
178 seq = read_seqbegin(&__vsyscall_gtod_data.lock); 172 seq = read_seqbegin(&VVAR(vsyscall_gtod_data).lock);
179 173
180 result = __vsyscall_gtod_data.wall_time_sec; 174 result = VVAR(vsyscall_gtod_data).wall_time_sec;
181 175
182 } while (read_seqretry(&__vsyscall_gtod_data.lock, seq)); 176 } while (read_seqretry(&VVAR(vsyscall_gtod_data).lock, seq));
183 177
184 if (t) 178 if (t)
185 *t = result; 179 *t = result;
@@ -208,9 +202,9 @@ vgetcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache)
208 We do this here because otherwise user space would do it on 202 We do this here because otherwise user space would do it on
209 its own in a likely inferior way (no access to jiffies). 203 its own in a likely inferior way (no access to jiffies).
210 If you don't like it pass NULL. */ 204 If you don't like it pass NULL. */
211 if (tcache && tcache->blob[0] == (j = __jiffies)) { 205 if (tcache && tcache->blob[0] == (j = VVAR(jiffies))) {
212 p = tcache->blob[1]; 206 p = tcache->blob[1];
213 } else if (__vgetcpu_mode == VGETCPU_RDTSCP) { 207 } else if (VVAR(vgetcpu_mode) == VGETCPU_RDTSCP) {
214 /* Load per CPU data from RDTSCP */ 208 /* Load per CPU data from RDTSCP */
215 native_read_tscp(&p); 209 native_read_tscp(&p);
216 } else { 210 } else {