diff options
author | Andy Lutomirski <luto@MIT.EDU> | 2011-06-05 13:50:24 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2011-06-07 04:02:35 -0400 |
commit | 5cec93c216db77c45f7ce970d46283bcb1933884 (patch) | |
tree | 1dffa866471cd3f4ad1a342e389d3a14cb87d05f /arch/x86/kernel | |
parent | 5dfcea629a08b4684a019cd0cb59d0c9129a6c02 (diff) |
x86-64: Emulate legacy vsyscalls
There's a fair amount of code in the vsyscall page. It contains
a syscall instruction (in the gettimeofday fallback) and who
knows what will happen if an exploit jumps into the middle of
some other code.
Reduce the risk by replacing the vsyscalls with short magic
incantations that cause the kernel to emulate the real
vsyscalls. These incantations are useless if entered in the
middle.
This causes vsyscalls to be a little more expensive than real
syscalls. Fortunately sensible programs don't use them.
The only exception is time() which is still called by glibc
through the vsyscall - but calling time() millions of times
per second is not sensible. glibc has this fixed in the
development tree.
This patch is not perfect: the vread_tsc and vread_hpet
functions are still at a fixed address. Fixing that might
involve making alternative patching work in the vDSO.
Signed-off-by: Andy Lutomirski <luto@mit.edu>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Jesper Juhl <jj@chaosbits.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Arjan van de Ven <arjan@infradead.org>
Cc: Jan Beulich <JBeulich@novell.com>
Cc: richard -rw- weinberger <richard.weinberger@gmail.com>
Cc: Mikael Pettersson <mikpe@it.uu.se>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Louis Rilling <Louis.Rilling@kerlabs.com>
Cc: Valdis.Kletnieks@vt.edu
Cc: pageexec@freemail.hu
Link: http://lkml.kernel.org/r/e64e1b3c64858820d12c48fa739efbd1485e79d5.1307292171.git.luto@mit.edu
[ Removed the CONFIG option - it's simpler to just do it unconditionally. Tidied up the code as well. ]
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r-- | arch/x86/kernel/Makefile | 1 | ||||
-rw-r--r-- | arch/x86/kernel/entry_64.S | 2 | ||||
-rw-r--r-- | arch/x86/kernel/traps.c | 6 | ||||
-rw-r--r-- | arch/x86/kernel/vsyscall_64.c | 261 | ||||
-rw-r--r-- | arch/x86/kernel/vsyscall_emu_64.S | 27 |
5 files changed, 158 insertions, 139 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 90b06d4daee2..cc0469a65120 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
@@ -44,6 +44,7 @@ obj-y += probe_roms.o | |||
44 | obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o | 44 | obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o |
45 | obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o | 45 | obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o |
46 | obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o vread_tsc_64.o | 46 | obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o vread_tsc_64.o |
47 | obj-$(CONFIG_X86_64) += vsyscall_emu_64.o | ||
47 | obj-y += bootflag.o e820.o | 48 | obj-y += bootflag.o e820.o |
48 | obj-y += pci-dma.o quirks.o topology.o kdebugfs.o | 49 | obj-y += pci-dma.o quirks.o topology.o kdebugfs.o |
49 | obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o | 50 | obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o |
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 72c4a777bb91..e949793d6b93 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
@@ -1123,6 +1123,8 @@ zeroentry spurious_interrupt_bug do_spurious_interrupt_bug | |||
1123 | zeroentry coprocessor_error do_coprocessor_error | 1123 | zeroentry coprocessor_error do_coprocessor_error |
1124 | errorentry alignment_check do_alignment_check | 1124 | errorentry alignment_check do_alignment_check |
1125 | zeroentry simd_coprocessor_error do_simd_coprocessor_error | 1125 | zeroentry simd_coprocessor_error do_simd_coprocessor_error |
1126 | zeroentry emulate_vsyscall do_emulate_vsyscall | ||
1127 | |||
1126 | 1128 | ||
1127 | /* Reload gs selector with exception handling */ | 1129 | /* Reload gs selector with exception handling */ |
1128 | /* edi: new selector */ | 1130 | /* edi: new selector */ |
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index b9b67166f9de..fbc097a085ca 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c | |||
@@ -872,6 +872,12 @@ void __init trap_init(void) | |||
872 | set_bit(SYSCALL_VECTOR, used_vectors); | 872 | set_bit(SYSCALL_VECTOR, used_vectors); |
873 | #endif | 873 | #endif |
874 | 874 | ||
875 | #ifdef CONFIG_X86_64 | ||
876 | BUG_ON(test_bit(VSYSCALL_EMU_VECTOR, used_vectors)); | ||
877 | set_system_intr_gate(VSYSCALL_EMU_VECTOR, &emulate_vsyscall); | ||
878 | set_bit(VSYSCALL_EMU_VECTOR, used_vectors); | ||
879 | #endif | ||
880 | |||
875 | /* | 881 | /* |
876 | * Should be a barrier for any external CPU state: | 882 | * Should be a barrier for any external CPU state: |
877 | */ | 883 | */ |
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index 70a5f6eebd6c..10cd8ac3395a 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c | |||
@@ -2,6 +2,8 @@ | |||
2 | * Copyright (C) 2001 Andrea Arcangeli <andrea@suse.de> SuSE | 2 | * Copyright (C) 2001 Andrea Arcangeli <andrea@suse.de> SuSE |
3 | * Copyright 2003 Andi Kleen, SuSE Labs. | 3 | * Copyright 2003 Andi Kleen, SuSE Labs. |
4 | * | 4 | * |
5 | * [ NOTE: this mechanism is now deprecated in favor of the vDSO. ] | ||
6 | * | ||
5 | * Thanks to hpa@transmeta.com for some useful hint. | 7 | * Thanks to hpa@transmeta.com for some useful hint. |
6 | * Special thanks to Ingo Molnar for his early experience with | 8 | * Special thanks to Ingo Molnar for his early experience with |
7 | * a different vsyscall implementation for Linux/IA32 and for the name. | 9 | * a different vsyscall implementation for Linux/IA32 and for the name. |
@@ -11,10 +13,9 @@ | |||
11 | * vsyscalls. One vsyscall can reserve more than 1 slot to avoid | 13 | * vsyscalls. One vsyscall can reserve more than 1 slot to avoid |
12 | * jumping out of line if necessary. We cannot add more with this | 14 | * jumping out of line if necessary. We cannot add more with this |
13 | * mechanism because older kernels won't return -ENOSYS. | 15 | * mechanism because older kernels won't return -ENOSYS. |
14 | * If we want more than four we need a vDSO. | ||
15 | * | 16 | * |
16 | * Note: the concept clashes with user mode linux. If you use UML and | 17 | * Note: the concept clashes with user mode linux. UML users should |
17 | * want per guest time just set the kernel.vsyscall64 sysctl to 0. | 18 | * use the vDSO. |
18 | */ | 19 | */ |
19 | 20 | ||
20 | /* Disable profiling for userspace code: */ | 21 | /* Disable profiling for userspace code: */ |
@@ -32,6 +33,8 @@ | |||
32 | #include <linux/cpu.h> | 33 | #include <linux/cpu.h> |
33 | #include <linux/smp.h> | 34 | #include <linux/smp.h> |
34 | #include <linux/notifier.h> | 35 | #include <linux/notifier.h> |
36 | #include <linux/syscalls.h> | ||
37 | #include <linux/ratelimit.h> | ||
35 | 38 | ||
36 | #include <asm/vsyscall.h> | 39 | #include <asm/vsyscall.h> |
37 | #include <asm/pgtable.h> | 40 | #include <asm/pgtable.h> |
@@ -44,10 +47,7 @@ | |||
44 | #include <asm/desc.h> | 47 | #include <asm/desc.h> |
45 | #include <asm/topology.h> | 48 | #include <asm/topology.h> |
46 | #include <asm/vgtod.h> | 49 | #include <asm/vgtod.h> |
47 | 50 | #include <asm/traps.h> | |
48 | #define __vsyscall(nr) \ | ||
49 | __attribute__ ((unused, __section__(".vsyscall_" #nr))) notrace | ||
50 | #define __syscall_clobber "r11","cx","memory" | ||
51 | 51 | ||
52 | DEFINE_VVAR(int, vgetcpu_mode); | 52 | DEFINE_VVAR(int, vgetcpu_mode); |
53 | DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data) = | 53 | DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data) = |
@@ -71,146 +71,129 @@ void update_vsyscall(struct timespec *wall_time, struct timespec *wtm, | |||
71 | unsigned long flags; | 71 | unsigned long flags; |
72 | 72 | ||
73 | write_seqlock_irqsave(&vsyscall_gtod_data.lock, flags); | 73 | write_seqlock_irqsave(&vsyscall_gtod_data.lock, flags); |
74 | |||
74 | /* copy vsyscall data */ | 75 | /* copy vsyscall data */ |
75 | vsyscall_gtod_data.clock.vread = clock->vread; | 76 | vsyscall_gtod_data.clock.vread = clock->vread; |
76 | vsyscall_gtod_data.clock.cycle_last = clock->cycle_last; | 77 | vsyscall_gtod_data.clock.cycle_last = clock->cycle_last; |
77 | vsyscall_gtod_data.clock.mask = clock->mask; | 78 | vsyscall_gtod_data.clock.mask = clock->mask; |
78 | vsyscall_gtod_data.clock.mult = mult; | 79 | vsyscall_gtod_data.clock.mult = mult; |
79 | vsyscall_gtod_data.clock.shift = clock->shift; | 80 | vsyscall_gtod_data.clock.shift = clock->shift; |
80 | vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec; | 81 | vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec; |
81 | vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec; | 82 | vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec; |
82 | vsyscall_gtod_data.wall_to_monotonic = *wtm; | 83 | vsyscall_gtod_data.wall_to_monotonic = *wtm; |
83 | vsyscall_gtod_data.wall_time_coarse = __current_kernel_time(); | 84 | vsyscall_gtod_data.wall_time_coarse = __current_kernel_time(); |
85 | |||
84 | write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags); | 86 | write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags); |
85 | } | 87 | } |
86 | 88 | ||
87 | /* RED-PEN may want to readd seq locking, but then the variable should be | 89 | static void warn_bad_vsyscall(const char *level, struct pt_regs *regs, |
88 | * write-once. | 90 | const char *message) |
89 | */ | ||
90 | static __always_inline void do_get_tz(struct timezone * tz) | ||
91 | { | 91 | { |
92 | *tz = VVAR(vsyscall_gtod_data).sys_tz; | 92 | static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST); |
93 | } | 93 | struct task_struct *tsk; |
94 | 94 | ||
95 | static __always_inline int gettimeofday(struct timeval *tv, struct timezone *tz) | 95 | if (!show_unhandled_signals || !__ratelimit(&rs)) |
96 | { | 96 | return; |
97 | int ret; | ||
98 | asm volatile("syscall" | ||
99 | : "=a" (ret) | ||
100 | : "0" (__NR_gettimeofday),"D" (tv),"S" (tz) | ||
101 | : __syscall_clobber ); | ||
102 | return ret; | ||
103 | } | ||
104 | 97 | ||
105 | static __always_inline void do_vgettimeofday(struct timeval * tv) | 98 | tsk = current; |
106 | { | ||
107 | cycle_t now, base, mask, cycle_delta; | ||
108 | unsigned seq; | ||
109 | unsigned long mult, shift, nsec; | ||
110 | cycle_t (*vread)(void); | ||
111 | do { | ||
112 | seq = read_seqbegin(&VVAR(vsyscall_gtod_data).lock); | ||
113 | |||
114 | vread = VVAR(vsyscall_gtod_data).clock.vread; | ||
115 | if (unlikely(!vread)) { | ||
116 | gettimeofday(tv,NULL); | ||
117 | return; | ||
118 | } | ||
119 | |||
120 | now = vread(); | ||
121 | base = VVAR(vsyscall_gtod_data).clock.cycle_last; | ||
122 | mask = VVAR(vsyscall_gtod_data).clock.mask; | ||
123 | mult = VVAR(vsyscall_gtod_data).clock.mult; | ||
124 | shift = VVAR(vsyscall_gtod_data).clock.shift; | ||
125 | |||
126 | tv->tv_sec = VVAR(vsyscall_gtod_data).wall_time_sec; | ||
127 | nsec = VVAR(vsyscall_gtod_data).wall_time_nsec; | ||
128 | } while (read_seqretry(&VVAR(vsyscall_gtod_data).lock, seq)); | ||
129 | |||
130 | /* calculate interval: */ | ||
131 | cycle_delta = (now - base) & mask; | ||
132 | /* convert to nsecs: */ | ||
133 | nsec += (cycle_delta * mult) >> shift; | ||
134 | |||
135 | while (nsec >= NSEC_PER_SEC) { | ||
136 | tv->tv_sec += 1; | ||
137 | nsec -= NSEC_PER_SEC; | ||
138 | } | ||
139 | tv->tv_usec = nsec / NSEC_PER_USEC; | ||
140 | } | ||
141 | 99 | ||
142 | int __vsyscall(0) vgettimeofday(struct timeval * tv, struct timezone * tz) | 100 | printk("%s%s[%d] %s ip:%lx sp:%lx ax:%lx si:%lx di:%lx\n", |
143 | { | 101 | level, tsk->comm, task_pid_nr(tsk), |
144 | if (tv) | 102 | message, regs->ip - 2, regs->sp, regs->ax, regs->si, regs->di); |
145 | do_vgettimeofday(tv); | ||
146 | if (tz) | ||
147 | do_get_tz(tz); | ||
148 | return 0; | ||
149 | } | 103 | } |
150 | 104 | ||
151 | /* This will break when the xtime seconds get inaccurate, but that is | 105 | void dotraplinkage do_emulate_vsyscall(struct pt_regs *regs, long error_code) |
152 | * unlikely */ | ||
153 | time_t __vsyscall(1) vtime(time_t *t) | ||
154 | { | 106 | { |
155 | unsigned seq; | 107 | const char *vsyscall_name; |
156 | time_t result; | 108 | struct task_struct *tsk; |
109 | unsigned long caller; | ||
110 | int vsyscall_nr; | ||
111 | long ret; | ||
112 | |||
113 | /* Kernel code must never get here. */ | ||
114 | BUG_ON(!user_mode(regs)); | ||
115 | |||
116 | local_irq_enable(); | ||
117 | |||
118 | /* | ||
119 | * x86-ism here: regs->ip points to the instruction after the int 0xcc, | ||
120 | * and int 0xcc is two bytes long. | ||
121 | */ | ||
122 | if (!is_vsyscall_entry(regs->ip - 2)) { | ||
123 | warn_bad_vsyscall(KERN_WARNING, regs, "illegal int 0xcc (exploit attempt?)"); | ||
124 | goto sigsegv; | ||
125 | } | ||
126 | vsyscall_nr = vsyscall_entry_nr(regs->ip - 2); | ||
157 | 127 | ||
158 | do { | 128 | if (get_user(caller, (unsigned long __user *)regs->sp) != 0) { |
159 | seq = read_seqbegin(&VVAR(vsyscall_gtod_data).lock); | 129 | warn_bad_vsyscall(KERN_WARNING, regs, "int 0xcc with bad stack (exploit attempt?)"); |
130 | goto sigsegv; | ||
131 | } | ||
160 | 132 | ||
161 | result = VVAR(vsyscall_gtod_data).wall_time_sec; | 133 | tsk = current; |
134 | if (seccomp_mode(&tsk->seccomp)) | ||
135 | do_exit(SIGKILL); | ||
136 | |||
137 | switch (vsyscall_nr) { | ||
138 | case 0: | ||
139 | vsyscall_name = "gettimeofday"; | ||
140 | ret = sys_gettimeofday( | ||
141 | (struct timeval __user *)regs->di, | ||
142 | (struct timezone __user *)regs->si); | ||
143 | break; | ||
144 | |||
145 | case 1: | ||
146 | vsyscall_name = "time"; | ||
147 | ret = sys_time((time_t __user *)regs->di); | ||
148 | break; | ||
149 | |||
150 | case 2: | ||
151 | vsyscall_name = "getcpu"; | ||
152 | ret = sys_getcpu((unsigned __user *)regs->di, | ||
153 | (unsigned __user *)regs->si, | ||
154 | 0); | ||
155 | break; | ||
156 | |||
157 | default: | ||
158 | /* | ||
159 | * If we get here, then vsyscall_nr indicates that int 0xcc | ||
160 | * happened at an address in the vsyscall page that doesn't | ||
161 | * contain int 0xcc. That can't happen. | ||
162 | */ | ||
163 | BUG(); | ||
164 | } | ||
162 | 165 | ||
163 | } while (read_seqretry(&VVAR(vsyscall_gtod_data).lock, seq)); | 166 | if (ret == -EFAULT) { |
167 | /* | ||
168 | * Bad news -- userspace fed a bad pointer to a vsyscall. | ||
169 | * | ||
170 | * With a real vsyscall, that would have caused SIGSEGV. | ||
171 | * To make writing reliable exploits using the emulated | ||
172 | * vsyscalls harder, generate SIGSEGV here as well. | ||
173 | */ | ||
174 | warn_bad_vsyscall(KERN_INFO, regs, | ||
175 | "vsyscall fault (exploit attempt?)"); | ||
176 | goto sigsegv; | ||
177 | } | ||
164 | 178 | ||
165 | if (t) | 179 | regs->ax = ret; |
166 | *t = result; | ||
167 | return result; | ||
168 | } | ||
169 | 180 | ||
170 | /* Fast way to get current CPU and node. | 181 | /* Emulate a ret instruction. */ |
171 | This helps to do per node and per CPU caches in user space. | 182 | regs->ip = caller; |
172 | The result is not guaranteed without CPU affinity, but usually | 183 | regs->sp += 8; |
173 | works out because the scheduler tries to keep a thread on the same | ||
174 | CPU. | ||
175 | 184 | ||
176 | tcache must point to a two element sized long array. | 185 | local_irq_disable(); |
177 | All arguments can be NULL. */ | 186 | return; |
178 | long __vsyscall(2) | 187 | |
179 | vgetcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache) | 188 | sigsegv: |
180 | { | 189 | regs->ip -= 2; /* The faulting instruction should be the int 0xcc. */ |
181 | unsigned int p; | 190 | force_sig(SIGSEGV, current); |
182 | unsigned long j = 0; | ||
183 | |||
184 | /* Fast cache - only recompute value once per jiffies and avoid | ||
185 | relatively costly rdtscp/cpuid otherwise. | ||
186 | This works because the scheduler usually keeps the process | ||
187 | on the same CPU and this syscall doesn't guarantee its | ||
188 | results anyways. | ||
189 | We do this here because otherwise user space would do it on | ||
190 | its own in a likely inferior way (no access to jiffies). | ||
191 | If you don't like it pass NULL. */ | ||
192 | if (tcache && tcache->blob[0] == (j = VVAR(jiffies))) { | ||
193 | p = tcache->blob[1]; | ||
194 | } else if (VVAR(vgetcpu_mode) == VGETCPU_RDTSCP) { | ||
195 | /* Load per CPU data from RDTSCP */ | ||
196 | native_read_tscp(&p); | ||
197 | } else { | ||
198 | /* Load per CPU data from GDT */ | ||
199 | asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG)); | ||
200 | } | ||
201 | if (tcache) { | ||
202 | tcache->blob[0] = j; | ||
203 | tcache->blob[1] = p; | ||
204 | } | ||
205 | if (cpu) | ||
206 | *cpu = p & 0xfff; | ||
207 | if (node) | ||
208 | *node = p >> 12; | ||
209 | return 0; | ||
210 | } | 191 | } |
211 | 192 | ||
212 | /* Assume __initcall executes before all user space. Hopefully kmod | 193 | /* |
213 | doesn't violate that. We'll find out if it does. */ | 194 | * Assume __initcall executes before all user space. Hopefully kmod |
195 | * doesn't violate that. We'll find out if it does. | ||
196 | */ | ||
214 | static void __cpuinit vsyscall_set_cpu(int cpu) | 197 | static void __cpuinit vsyscall_set_cpu(int cpu) |
215 | { | 198 | { |
216 | unsigned long d; | 199 | unsigned long d; |
@@ -221,13 +204,15 @@ static void __cpuinit vsyscall_set_cpu(int cpu) | |||
221 | if (cpu_has(&cpu_data(cpu), X86_FEATURE_RDTSCP)) | 204 | if (cpu_has(&cpu_data(cpu), X86_FEATURE_RDTSCP)) |
222 | write_rdtscp_aux((node << 12) | cpu); | 205 | write_rdtscp_aux((node << 12) | cpu); |
223 | 206 | ||
224 | /* Store cpu number in limit so that it can be loaded quickly | 207 | /* |
225 | in user space in vgetcpu. | 208 | * Store cpu number in limit so that it can be loaded quickly |
226 | 12 bits for the CPU and 8 bits for the node. */ | 209 | * in user space in vgetcpu. (12 bits for the CPU and 8 bits for the node) |
210 | */ | ||
227 | d = 0x0f40000000000ULL; | 211 | d = 0x0f40000000000ULL; |
228 | d |= cpu; | 212 | d |= cpu; |
229 | d |= (node & 0xf) << 12; | 213 | d |= (node & 0xf) << 12; |
230 | d |= (node >> 4) << 48; | 214 | d |= (node >> 4) << 48; |
215 | |||
231 | write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_PER_CPU, &d, DESCTYPE_S); | 216 | write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_PER_CPU, &d, DESCTYPE_S); |
232 | } | 217 | } |
233 | 218 | ||
@@ -241,8 +226,10 @@ static int __cpuinit | |||
241 | cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg) | 226 | cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg) |
242 | { | 227 | { |
243 | long cpu = (long)arg; | 228 | long cpu = (long)arg; |
229 | |||
244 | if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN) | 230 | if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN) |
245 | smp_call_function_single(cpu, cpu_vsyscall_init, NULL, 1); | 231 | smp_call_function_single(cpu, cpu_vsyscall_init, NULL, 1); |
232 | |||
246 | return NOTIFY_DONE; | 233 | return NOTIFY_DONE; |
247 | } | 234 | } |
248 | 235 | ||
@@ -256,21 +243,17 @@ void __init map_vsyscall(void) | |||
256 | /* Note that VSYSCALL_MAPPED_PAGES must agree with the code below. */ | 243 | /* Note that VSYSCALL_MAPPED_PAGES must agree with the code below. */ |
257 | __set_fixmap(VSYSCALL_FIRST_PAGE, physaddr_page0, PAGE_KERNEL_VSYSCALL); | 244 | __set_fixmap(VSYSCALL_FIRST_PAGE, physaddr_page0, PAGE_KERNEL_VSYSCALL); |
258 | __set_fixmap(VVAR_PAGE, physaddr_vvar_page, PAGE_KERNEL_VVAR); | 245 | __set_fixmap(VVAR_PAGE, physaddr_vvar_page, PAGE_KERNEL_VVAR); |
259 | BUILD_BUG_ON((unsigned long)__fix_to_virt(VVAR_PAGE) != | 246 | BUILD_BUG_ON((unsigned long)__fix_to_virt(VVAR_PAGE) != (unsigned long)VVAR_ADDRESS); |
260 | (unsigned long)VVAR_ADDRESS); | ||
261 | } | 247 | } |
262 | 248 | ||
263 | static int __init vsyscall_init(void) | 249 | static int __init vsyscall_init(void) |
264 | { | 250 | { |
265 | BUG_ON(((unsigned long) &vgettimeofday != | 251 | BUG_ON(VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE)); |
266 | VSYSCALL_ADDR(__NR_vgettimeofday))); | 252 | |
267 | BUG_ON((unsigned long) &vtime != VSYSCALL_ADDR(__NR_vtime)); | ||
268 | BUG_ON((VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE))); | ||
269 | BUG_ON((unsigned long) &vgetcpu != VSYSCALL_ADDR(__NR_vgetcpu)); | ||
270 | on_each_cpu(cpu_vsyscall_init, NULL, 1); | 253 | on_each_cpu(cpu_vsyscall_init, NULL, 1); |
271 | /* notifier priority > KVM */ | 254 | /* notifier priority > KVM */ |
272 | hotcpu_notifier(cpu_vsyscall_notifier, 30); | 255 | hotcpu_notifier(cpu_vsyscall_notifier, 30); |
256 | |||
273 | return 0; | 257 | return 0; |
274 | } | 258 | } |
275 | |||
276 | __initcall(vsyscall_init); | 259 | __initcall(vsyscall_init); |
diff --git a/arch/x86/kernel/vsyscall_emu_64.S b/arch/x86/kernel/vsyscall_emu_64.S new file mode 100644 index 000000000000..ffa845eae5ca --- /dev/null +++ b/arch/x86/kernel/vsyscall_emu_64.S | |||
@@ -0,0 +1,27 @@ | |||
1 | /* | ||
2 | * vsyscall_emu_64.S: Vsyscall emulation page | ||
3 | * | ||
4 | * Copyright (c) 2011 Andy Lutomirski | ||
5 | * | ||
6 | * Subject to the GNU General Public License, version 2 | ||
7 | */ | ||
8 | |||
9 | #include <linux/linkage.h> | ||
10 | #include <asm/irq_vectors.h> | ||
11 | |||
12 | /* The unused parts of the page are filled with 0xcc by the linker script. */ | ||
13 | |||
14 | .section .vsyscall_0, "a" | ||
15 | ENTRY(vsyscall_0) | ||
16 | int $VSYSCALL_EMU_VECTOR | ||
17 | END(vsyscall_0) | ||
18 | |||
19 | .section .vsyscall_1, "a" | ||
20 | ENTRY(vsyscall_1) | ||
21 | int $VSYSCALL_EMU_VECTOR | ||
22 | END(vsyscall_1) | ||
23 | |||
24 | .section .vsyscall_2, "a" | ||
25 | ENTRY(vsyscall_2) | ||
26 | int $VSYSCALL_EMU_VECTOR | ||
27 | END(vsyscall_2) | ||