aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-12-10 17:24:20 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2014-12-10 17:24:20 -0500
commit3100e448e7d74489a96cb7b45d88fe6962774eaa (patch)
tree53e46a702bd191ca43639b560d2bb1d3b0ad18c8 /arch
parentc9f861c77269bc9950c16c6404a9476062241671 (diff)
parent26893107aa717cd11010f0c278d02535defa1ac9 (diff)
Merge branch 'x86-vdso-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 vdso updates from Ingo Molnar: "Various vDSO updates from Andy Lutomirski, mostly cleanups and reorganization to improve maintainability, but also some micro-optimizations and robustization changes" * 'x86-vdso-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86_64/vsyscall: Restore orig_ax after vsyscall seccomp x86_64: Add a comment explaining the TASK_SIZE_MAX guard page x86_64,vsyscall: Make vsyscall emulation configurable x86_64, vsyscall: Rewrite comment and clean up headers in vsyscall code x86_64, vsyscall: Turn vsyscalls all the way off when vsyscall==none x86,vdso: Use LSL unconditionally for vgetcpu x86: vdso: Fix build with older gcc x86_64/vdso: Clean up vgetcpu init and merge the vdso initcalls x86_64/vdso: Remove jiffies from the vvar page x86/vdso: Make the PER_CPU segment 32 bits x86/vdso: Make the PER_CPU segment start out accessed x86/vdso: Change the PER_CPU segment to use struct desc_struct x86_64/vdso: Move getcpu code from vsyscall_64.c to vdso/vma.c x86_64/vsyscall: Move all of the gate_area code to vsyscall_64.c
Diffstat (limited to 'arch')
-rw-r--r--arch/x86/Kconfig18
-rw-r--r--arch/x86/include/asm/fixmap.h2
-rw-r--r--arch/x86/include/asm/page_64.h4
-rw-r--r--arch/x86/include/asm/processor.h8
-rw-r--r--arch/x86/include/asm/vgtod.h19
-rw-r--r--arch/x86/include/asm/vsyscall.h33
-rw-r--r--arch/x86/include/asm/vvar.h2
-rw-r--r--arch/x86/kernel/Makefile3
-rw-r--r--arch/x86/kernel/cpu/common.c10
-rw-r--r--arch/x86/kernel/setup.c2
-rw-r--r--arch/x86/kernel/time.c2
-rw-r--r--arch/x86/kernel/vsyscall_64.c147
-rw-r--r--arch/x86/mm/init_64.c49
-rw-r--r--arch/x86/vdso/vgetcpu.c2
-rw-r--r--arch/x86/vdso/vma.c83
-rw-r--r--arch/x86/xen/mmu.c6
16 files changed, 190 insertions, 200 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 666ac6651c17..bea3a0159496 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -992,6 +992,24 @@ config X86_ESPFIX64
992 def_bool y 992 def_bool y
993 depends on X86_16BIT && X86_64 993 depends on X86_16BIT && X86_64
994 994
995config X86_VSYSCALL_EMULATION
996 bool "Enable vsyscall emulation" if EXPERT
997 default y
998 depends on X86_64
999 ---help---
1000 This enables emulation of the legacy vsyscall page. Disabling
1001 it is roughly equivalent to booting with vsyscall=none, except
1002 that it will also disable the helpful warning if a program
1003 tries to use a vsyscall. With this option set to N, offending
1004 programs will just segfault, citing addresses of the form
1005 0xffffffffff600?00.
1006
1007 This option is required by many programs built before 2013, and
1008 care should be used even with newer programs if set to N.
1009
1010 Disabling this option saves about 7K of kernel size and
1011 possibly 4K of additional runtime pagetable memory.
1012
995config TOSHIBA 1013config TOSHIBA
996 tristate "Toshiba Laptop support" 1014 tristate "Toshiba Laptop support"
997 depends on X86_32 1015 depends on X86_32
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index bf728e49c53c..f80d70009ff8 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -69,7 +69,9 @@ enum fixed_addresses {
69#ifdef CONFIG_X86_32 69#ifdef CONFIG_X86_32
70 FIX_HOLE, 70 FIX_HOLE,
71#else 71#else
72#ifdef CONFIG_X86_VSYSCALL_EMULATION
72 VSYSCALL_PAGE = (FIXADDR_TOP - VSYSCALL_ADDR) >> PAGE_SHIFT, 73 VSYSCALL_PAGE = (FIXADDR_TOP - VSYSCALL_ADDR) >> PAGE_SHIFT,
74#endif
73#ifdef CONFIG_PARAVIRT_CLOCK 75#ifdef CONFIG_PARAVIRT_CLOCK
74 PVCLOCK_FIXMAP_BEGIN, 76 PVCLOCK_FIXMAP_BEGIN,
75 PVCLOCK_FIXMAP_END = PVCLOCK_FIXMAP_BEGIN+PVCLOCK_VSYSCALL_NR_PAGES-1, 77 PVCLOCK_FIXMAP_END = PVCLOCK_FIXMAP_BEGIN+PVCLOCK_VSYSCALL_NR_PAGES-1,
diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h
index f408caf73430..b3bebf9e5746 100644
--- a/arch/x86/include/asm/page_64.h
+++ b/arch/x86/include/asm/page_64.h
@@ -39,6 +39,8 @@ void copy_page(void *to, void *from);
39 39
40#endif /* !__ASSEMBLY__ */ 40#endif /* !__ASSEMBLY__ */
41 41
42#define __HAVE_ARCH_GATE_AREA 1 42#ifdef CONFIG_X86_VSYSCALL_EMULATION
43# define __HAVE_ARCH_GATE_AREA 1
44#endif
43 45
44#endif /* _ASM_X86_PAGE_64_H */ 46#endif /* _ASM_X86_PAGE_64_H */
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 25b8de0f21c0..a092a0cce0b7 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -894,7 +894,13 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk);
894 894
895#else 895#else
896/* 896/*
897 * User space process size. 47bits minus one guard page. 897 * User space process size. 47bits minus one guard page. The guard
898 * page is necessary on Intel CPUs: if a SYSCALL instruction is at
899 * the highest possible canonical userspace address, then that
900 * syscall will enter the kernel with a non-canonical return
901 * address, and SYSRET will explode dangerously. We avoid this
902 * particular problem by preventing anything from being mapped
903 * at the maximum canonical address.
898 */ 904 */
899#define TASK_SIZE_MAX ((1UL << 47) - PAGE_SIZE) 905#define TASK_SIZE_MAX ((1UL << 47) - PAGE_SIZE)
900 906
diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h
index 3c3366c2e37f..e7e9682a33e9 100644
--- a/arch/x86/include/asm/vgtod.h
+++ b/arch/x86/include/asm/vgtod.h
@@ -70,4 +70,23 @@ static inline void gtod_write_end(struct vsyscall_gtod_data *s)
70 ++s->seq; 70 ++s->seq;
71} 71}
72 72
73#ifdef CONFIG_X86_64
74
75#define VGETCPU_CPU_MASK 0xfff
76
77static inline unsigned int __getcpu(void)
78{
79 unsigned int p;
80
81 /*
82 * Load per CPU data from GDT. LSL is faster than RDTSCP and
83 * works on all CPUs.
84 */
85 asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG));
86
87 return p;
88}
89
90#endif /* CONFIG_X86_64 */
91
73#endif /* _ASM_X86_VGTOD_H */ 92#endif /* _ASM_X86_VGTOD_H */
diff --git a/arch/x86/include/asm/vsyscall.h b/arch/x86/include/asm/vsyscall.h
index 2a46ca720afc..6ba66ee79710 100644
--- a/arch/x86/include/asm/vsyscall.h
+++ b/arch/x86/include/asm/vsyscall.h
@@ -4,15 +4,7 @@
4#include <linux/seqlock.h> 4#include <linux/seqlock.h>
5#include <uapi/asm/vsyscall.h> 5#include <uapi/asm/vsyscall.h>
6 6
7#define VGETCPU_RDTSCP 1 7#ifdef CONFIG_X86_VSYSCALL_EMULATION
8#define VGETCPU_LSL 2
9
10/* kernel space (writeable) */
11extern int vgetcpu_mode;
12extern struct timezone sys_tz;
13
14#include <asm/vvar.h>
15
16extern void map_vsyscall(void); 8extern void map_vsyscall(void);
17 9
18/* 10/*
@@ -20,25 +12,12 @@ extern void map_vsyscall(void);
20 * Returns true if handled. 12 * Returns true if handled.
21 */ 13 */
22extern bool emulate_vsyscall(struct pt_regs *regs, unsigned long address); 14extern bool emulate_vsyscall(struct pt_regs *regs, unsigned long address);
23 15#else
24#ifdef CONFIG_X86_64 16static inline void map_vsyscall(void) {}
25 17static inline bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
26#define VGETCPU_CPU_MASK 0xfff
27
28static inline unsigned int __getcpu(void)
29{ 18{
30 unsigned int p; 19 return false;
31
32 if (VVAR(vgetcpu_mode) == VGETCPU_RDTSCP) {
33 /* Load per CPU data from RDTSCP */
34 native_read_tscp(&p);
35 } else {
36 /* Load per CPU data from GDT */
37 asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG));
38 }
39
40 return p;
41} 20}
42#endif /* CONFIG_X86_64 */ 21#endif
43 22
44#endif /* _ASM_X86_VSYSCALL_H */ 23#endif /* _ASM_X86_VSYSCALL_H */
diff --git a/arch/x86/include/asm/vvar.h b/arch/x86/include/asm/vvar.h
index 5d2b9ad2c6d2..3f32dfc2ab73 100644
--- a/arch/x86/include/asm/vvar.h
+++ b/arch/x86/include/asm/vvar.h
@@ -44,8 +44,6 @@ extern char __vvar_page;
44 44
45/* DECLARE_VVAR(offset, type, name) */ 45/* DECLARE_VVAR(offset, type, name) */
46 46
47DECLARE_VVAR(0, volatile unsigned long, jiffies)
48DECLARE_VVAR(16, int, vgetcpu_mode)
49DECLARE_VVAR(128, struct vsyscall_gtod_data, vsyscall_gtod_data) 47DECLARE_VVAR(128, struct vsyscall_gtod_data, vsyscall_gtod_data)
50 48
51#undef DECLARE_VVAR 49#undef DECLARE_VVAR
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 8f1e77440b2b..5d4502c8b983 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -28,8 +28,7 @@ obj-$(CONFIG_X86_32) += i386_ksyms_32.o
28obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o 28obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o
29obj-$(CONFIG_X86_64) += mcount_64.o 29obj-$(CONFIG_X86_64) += mcount_64.o
30obj-y += syscall_$(BITS).o vsyscall_gtod.o 30obj-y += syscall_$(BITS).o vsyscall_gtod.o
31obj-$(CONFIG_X86_64) += vsyscall_64.o 31obj-$(CONFIG_X86_VSYSCALL_EMULATION) += vsyscall_64.o vsyscall_emu_64.o
32obj-$(CONFIG_X86_64) += vsyscall_emu_64.o
33obj-$(CONFIG_X86_ESPFIX64) += espfix_64.o 32obj-$(CONFIG_X86_ESPFIX64) += espfix_64.o
34obj-$(CONFIG_SYSFS) += ksysfs.o 33obj-$(CONFIG_SYSFS) += ksysfs.o
35obj-y += bootflag.o e820.o 34obj-y += bootflag.o e820.o
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index cfa9b5b2c27a..c6049650c093 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -958,14 +958,6 @@ static void identify_cpu(struct cpuinfo_x86 *c)
958} 958}
959 959
960#ifdef CONFIG_X86_64 960#ifdef CONFIG_X86_64
961static void vgetcpu_set_mode(void)
962{
963 if (cpu_has(&boot_cpu_data, X86_FEATURE_RDTSCP))
964 vgetcpu_mode = VGETCPU_RDTSCP;
965 else
966 vgetcpu_mode = VGETCPU_LSL;
967}
968
969#ifdef CONFIG_IA32_EMULATION 961#ifdef CONFIG_IA32_EMULATION
970/* May not be __init: called during resume */ 962/* May not be __init: called during resume */
971static void syscall32_cpu_init(void) 963static void syscall32_cpu_init(void)
@@ -1008,8 +1000,6 @@ void __init identify_boot_cpu(void)
1008#ifdef CONFIG_X86_32 1000#ifdef CONFIG_X86_32
1009 sysenter_setup(); 1001 sysenter_setup();
1010 enable_sep_cpu(); 1002 enable_sep_cpu();
1011#else
1012 vgetcpu_set_mode();
1013#endif 1003#endif
1014 cpu_detect_tlb(&boot_cpu_data); 1004 cpu_detect_tlb(&boot_cpu_data);
1015} 1005}
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 214245d6b996..ab4734e5411d 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1192,9 +1192,7 @@ void __init setup_arch(char **cmdline_p)
1192 1192
1193 tboot_probe(); 1193 tboot_probe();
1194 1194
1195#ifdef CONFIG_X86_64
1196 map_vsyscall(); 1195 map_vsyscall();
1197#endif
1198 1196
1199 generic_apic_probe(); 1197 generic_apic_probe();
1200 1198
diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c
index 0fa29609b2c4..25adc0e16eaa 100644
--- a/arch/x86/kernel/time.c
+++ b/arch/x86/kernel/time.c
@@ -23,7 +23,7 @@
23#include <asm/time.h> 23#include <asm/time.h>
24 24
25#ifdef CONFIG_X86_64 25#ifdef CONFIG_X86_64
26__visible DEFINE_VVAR(volatile unsigned long, jiffies) = INITIAL_JIFFIES; 26__visible volatile unsigned long jiffies __cacheline_aligned = INITIAL_JIFFIES;
27#endif 27#endif
28 28
29unsigned long profile_pc(struct pt_regs *regs) 29unsigned long profile_pc(struct pt_regs *regs)
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index 957779f4eb40..2dcc6ff6fdcc 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -1,59 +1,43 @@
1/* 1/*
2 * Copyright (c) 2012-2014 Andy Lutomirski <luto@amacapital.net>
3 *
4 * Based on the original implementation which is:
2 * Copyright (C) 2001 Andrea Arcangeli <andrea@suse.de> SuSE 5 * Copyright (C) 2001 Andrea Arcangeli <andrea@suse.de> SuSE
3 * Copyright 2003 Andi Kleen, SuSE Labs. 6 * Copyright 2003 Andi Kleen, SuSE Labs.
4 * 7 *
5 * [ NOTE: this mechanism is now deprecated in favor of the vDSO. ] 8 * Parts of the original code have been moved to arch/x86/vdso/vma.c
9 *
10 * This file implements vsyscall emulation. vsyscalls are a legacy ABI:
11 * Userspace can request certain kernel services by calling fixed
12 * addresses. This concept is problematic:
6 * 13 *
7 * Thanks to hpa@transmeta.com for some useful hint. 14 * - It interferes with ASLR.
8 * Special thanks to Ingo Molnar for his early experience with 15 * - It's awkward to write code that lives in kernel addresses but is
9 * a different vsyscall implementation for Linux/IA32 and for the name. 16 * callable by userspace at fixed addresses.
17 * - The whole concept is impossible for 32-bit compat userspace.
18 * - UML cannot easily virtualize a vsyscall.
10 * 19 *
11 * vsyscall 1 is located at -10Mbyte, vsyscall 2 is located 20 * As of mid-2014, I believe that there is no new userspace code that
12 * at virtual address -10Mbyte+1024bytes etc... There are at max 4 21 * will use a vsyscall if the vDSO is present. I hope that there will
13 * vsyscalls. One vsyscall can reserve more than 1 slot to avoid 22 * soon be no new userspace code that will ever use a vsyscall.
14 * jumping out of line if necessary. We cannot add more with this
15 * mechanism because older kernels won't return -ENOSYS.
16 * 23 *
17 * Note: the concept clashes with user mode linux. UML users should 24 * The code in this file emulates vsyscalls when notified of a page
18 * use the vDSO. 25 * fault to a vsyscall address.
19 */ 26 */
20 27
21#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
22
23#include <linux/time.h>
24#include <linux/init.h>
25#include <linux/kernel.h> 28#include <linux/kernel.h>
26#include <linux/timer.h> 29#include <linux/timer.h>
27#include <linux/seqlock.h>
28#include <linux/jiffies.h>
29#include <linux/sysctl.h>
30#include <linux/topology.h>
31#include <linux/timekeeper_internal.h>
32#include <linux/getcpu.h>
33#include <linux/cpu.h>
34#include <linux/smp.h>
35#include <linux/notifier.h>
36#include <linux/syscalls.h> 30#include <linux/syscalls.h>
37#include <linux/ratelimit.h> 31#include <linux/ratelimit.h>
38 32
39#include <asm/vsyscall.h> 33#include <asm/vsyscall.h>
40#include <asm/pgtable.h>
41#include <asm/compat.h>
42#include <asm/page.h>
43#include <asm/unistd.h> 34#include <asm/unistd.h>
44#include <asm/fixmap.h> 35#include <asm/fixmap.h>
45#include <asm/errno.h>
46#include <asm/io.h>
47#include <asm/segment.h>
48#include <asm/desc.h>
49#include <asm/topology.h>
50#include <asm/traps.h> 36#include <asm/traps.h>
51 37
52#define CREATE_TRACE_POINTS 38#define CREATE_TRACE_POINTS
53#include "vsyscall_trace.h" 39#include "vsyscall_trace.h"
54 40
55DEFINE_VVAR(int, vgetcpu_mode);
56
57static enum { EMULATE, NATIVE, NONE } vsyscall_mode = EMULATE; 41static enum { EMULATE, NATIVE, NONE } vsyscall_mode = EMULATE;
58 42
59static int __init vsyscall_setup(char *str) 43static int __init vsyscall_setup(char *str)
@@ -222,6 +206,7 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
222 "seccomp tried to change syscall nr or ip"); 206 "seccomp tried to change syscall nr or ip");
223 do_exit(SIGSYS); 207 do_exit(SIGSYS);
224 } 208 }
209 regs->orig_ax = -1;
225 if (tmp) 210 if (tmp)
226 goto do_ret; /* skip requested */ 211 goto do_ret; /* skip requested */
227 212
@@ -284,46 +269,54 @@ sigsegv:
284} 269}
285 270
286/* 271/*
287 * Assume __initcall executes before all user space. Hopefully kmod 272 * A pseudo VMA to allow ptrace access for the vsyscall page. This only
288 * doesn't violate that. We'll find out if it does. 273 * covers the 64bit vsyscall page now. 32bit has a real VMA now and does
274 * not need special handling anymore:
289 */ 275 */
290static void vsyscall_set_cpu(int cpu) 276static const char *gate_vma_name(struct vm_area_struct *vma)
291{ 277{
292 unsigned long d; 278 return "[vsyscall]";
293 unsigned long node = 0;
294#ifdef CONFIG_NUMA
295 node = cpu_to_node(cpu);
296#endif
297 if (cpu_has(&cpu_data(cpu), X86_FEATURE_RDTSCP))
298 write_rdtscp_aux((node << 12) | cpu);
299
300 /*
301 * Store cpu number in limit so that it can be loaded quickly
302 * in user space in vgetcpu. (12 bits for the CPU and 8 bits for the node)
303 */
304 d = 0x0f40000000000ULL;
305 d |= cpu;
306 d |= (node & 0xf) << 12;
307 d |= (node >> 4) << 48;
308
309 write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_PER_CPU, &d, DESCTYPE_S);
310} 279}
311 280static struct vm_operations_struct gate_vma_ops = {
312static void cpu_vsyscall_init(void *arg) 281 .name = gate_vma_name,
282};
283static struct vm_area_struct gate_vma = {
284 .vm_start = VSYSCALL_ADDR,
285 .vm_end = VSYSCALL_ADDR + PAGE_SIZE,
286 .vm_page_prot = PAGE_READONLY_EXEC,
287 .vm_flags = VM_READ | VM_EXEC,
288 .vm_ops = &gate_vma_ops,
289};
290
291struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
313{ 292{
314 /* preemption should be already off */ 293#ifdef CONFIG_IA32_EMULATION
315 vsyscall_set_cpu(raw_smp_processor_id()); 294 if (!mm || mm->context.ia32_compat)
295 return NULL;
296#endif
297 if (vsyscall_mode == NONE)
298 return NULL;
299 return &gate_vma;
316} 300}
317 301
318static int 302int in_gate_area(struct mm_struct *mm, unsigned long addr)
319cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg)
320{ 303{
321 long cpu = (long)arg; 304 struct vm_area_struct *vma = get_gate_vma(mm);
305
306 if (!vma)
307 return 0;
322 308
323 if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN) 309 return (addr >= vma->vm_start) && (addr < vma->vm_end);
324 smp_call_function_single(cpu, cpu_vsyscall_init, NULL, 1); 310}
325 311
326 return NOTIFY_DONE; 312/*
313 * Use this when you have no reliable mm, typically from interrupt
314 * context. It is less reliable than using a task's mm and may give
315 * false positives.
316 */
317int in_gate_area_no_mm(unsigned long addr)
318{
319 return vsyscall_mode != NONE && (addr & PAGE_MASK) == VSYSCALL_ADDR;
327} 320}
328 321
329void __init map_vsyscall(void) 322void __init map_vsyscall(void)
@@ -331,24 +324,12 @@ void __init map_vsyscall(void)
331 extern char __vsyscall_page; 324 extern char __vsyscall_page;
332 unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page); 325 unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page);
333 326
334 __set_fixmap(VSYSCALL_PAGE, physaddr_vsyscall, 327 if (vsyscall_mode != NONE)
335 vsyscall_mode == NATIVE 328 __set_fixmap(VSYSCALL_PAGE, physaddr_vsyscall,
336 ? PAGE_KERNEL_VSYSCALL 329 vsyscall_mode == NATIVE
337 : PAGE_KERNEL_VVAR); 330 ? PAGE_KERNEL_VSYSCALL
331 : PAGE_KERNEL_VVAR);
332
338 BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_PAGE) != 333 BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_PAGE) !=
339 (unsigned long)VSYSCALL_ADDR); 334 (unsigned long)VSYSCALL_ADDR);
340} 335}
341
342static int __init vsyscall_init(void)
343{
344 cpu_notifier_register_begin();
345
346 on_each_cpu(cpu_vsyscall_init, NULL, 1);
347 /* notifier priority > KVM */
348 __hotcpu_notifier(cpu_vsyscall_notifier, 30);
349
350 cpu_notifier_register_done();
351
352 return 0;
353}
354__initcall(vsyscall_init);
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 78e53c80fc12..30eb05ae7061 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -1204,55 +1204,6 @@ int kern_addr_valid(unsigned long addr)
1204 return pfn_valid(pte_pfn(*pte)); 1204 return pfn_valid(pte_pfn(*pte));
1205} 1205}
1206 1206
1207/*
1208 * A pseudo VMA to allow ptrace access for the vsyscall page. This only
1209 * covers the 64bit vsyscall page now. 32bit has a real VMA now and does
1210 * not need special handling anymore:
1211 */
1212static const char *gate_vma_name(struct vm_area_struct *vma)
1213{
1214 return "[vsyscall]";
1215}
1216static struct vm_operations_struct gate_vma_ops = {
1217 .name = gate_vma_name,
1218};
1219static struct vm_area_struct gate_vma = {
1220 .vm_start = VSYSCALL_ADDR,
1221 .vm_end = VSYSCALL_ADDR + PAGE_SIZE,
1222 .vm_page_prot = PAGE_READONLY_EXEC,
1223 .vm_flags = VM_READ | VM_EXEC,
1224 .vm_ops = &gate_vma_ops,
1225};
1226
1227struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
1228{
1229#ifdef CONFIG_IA32_EMULATION
1230 if (!mm || mm->context.ia32_compat)
1231 return NULL;
1232#endif
1233 return &gate_vma;
1234}
1235
1236int in_gate_area(struct mm_struct *mm, unsigned long addr)
1237{
1238 struct vm_area_struct *vma = get_gate_vma(mm);
1239
1240 if (!vma)
1241 return 0;
1242
1243 return (addr >= vma->vm_start) && (addr < vma->vm_end);
1244}
1245
1246/*
1247 * Use this when you have no reliable mm, typically from interrupt
1248 * context. It is less reliable than using a task's mm and may give
1249 * false positives.
1250 */
1251int in_gate_area_no_mm(unsigned long addr)
1252{
1253 return (addr & PAGE_MASK) == VSYSCALL_ADDR;
1254}
1255
1256static unsigned long probe_memory_block_size(void) 1207static unsigned long probe_memory_block_size(void)
1257{ 1208{
1258 /* start from 2g */ 1209 /* start from 2g */
diff --git a/arch/x86/vdso/vgetcpu.c b/arch/x86/vdso/vgetcpu.c
index 2f94b039e55b..8ec3d1f4ce9a 100644
--- a/arch/x86/vdso/vgetcpu.c
+++ b/arch/x86/vdso/vgetcpu.c
@@ -7,9 +7,7 @@
7 7
8#include <linux/kernel.h> 8#include <linux/kernel.h>
9#include <linux/getcpu.h> 9#include <linux/getcpu.h>
10#include <linux/jiffies.h>
11#include <linux/time.h> 10#include <linux/time.h>
12#include <asm/vsyscall.h>
13#include <asm/vgtod.h> 11#include <asm/vgtod.h>
14 12
15notrace long 13notrace long
diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c
index 970463b566cf..009495b9ab4b 100644
--- a/arch/x86/vdso/vma.c
+++ b/arch/x86/vdso/vma.c
@@ -1,7 +1,8 @@
1/* 1/*
2 * Set up the VMAs to tell the VM about the vDSO.
3 * Copyright 2007 Andi Kleen, SUSE Labs. 2 * Copyright 2007 Andi Kleen, SUSE Labs.
4 * Subject to the GPL, v.2 3 * Subject to the GPL, v.2
4 *
5 * This contains most of the x86 vDSO kernel-side code.
5 */ 6 */
6#include <linux/mm.h> 7#include <linux/mm.h>
7#include <linux/err.h> 8#include <linux/err.h>
@@ -10,17 +11,17 @@
10#include <linux/init.h> 11#include <linux/init.h>
11#include <linux/random.h> 12#include <linux/random.h>
12#include <linux/elf.h> 13#include <linux/elf.h>
13#include <asm/vsyscall.h> 14#include <linux/cpu.h>
14#include <asm/vgtod.h> 15#include <asm/vgtod.h>
15#include <asm/proto.h> 16#include <asm/proto.h>
16#include <asm/vdso.h> 17#include <asm/vdso.h>
18#include <asm/vvar.h>
17#include <asm/page.h> 19#include <asm/page.h>
18#include <asm/hpet.h> 20#include <asm/hpet.h>
21#include <asm/desc.h>
19 22
20#if defined(CONFIG_X86_64) 23#if defined(CONFIG_X86_64)
21unsigned int __read_mostly vdso64_enabled = 1; 24unsigned int __read_mostly vdso64_enabled = 1;
22
23extern unsigned short vdso_sync_cpuid;
24#endif 25#endif
25 26
26void __init init_vdso_image(const struct vdso_image *image) 27void __init init_vdso_image(const struct vdso_image *image)
@@ -38,20 +39,6 @@ void __init init_vdso_image(const struct vdso_image *image)
38 image->alt_len)); 39 image->alt_len));
39} 40}
40 41
41#if defined(CONFIG_X86_64)
42static int __init init_vdso(void)
43{
44 init_vdso_image(&vdso_image_64);
45
46#ifdef CONFIG_X86_X32_ABI
47 init_vdso_image(&vdso_image_x32);
48#endif
49
50 return 0;
51}
52subsys_initcall(init_vdso);
53#endif
54
55struct linux_binprm; 42struct linux_binprm;
56 43
57/* Put the vdso above the (randomized) stack with another randomized offset. 44/* Put the vdso above the (randomized) stack with another randomized offset.
@@ -238,3 +225,63 @@ static __init int vdso_setup(char *s)
238} 225}
239__setup("vdso=", vdso_setup); 226__setup("vdso=", vdso_setup);
240#endif 227#endif
228
229#ifdef CONFIG_X86_64
230static void vgetcpu_cpu_init(void *arg)
231{
232 int cpu = smp_processor_id();
233 struct desc_struct d = { };
234 unsigned long node = 0;
235#ifdef CONFIG_NUMA
236 node = cpu_to_node(cpu);
237#endif
238 if (cpu_has(&cpu_data(cpu), X86_FEATURE_RDTSCP))
239 write_rdtscp_aux((node << 12) | cpu);
240
241 /*
242 * Store cpu number in limit so that it can be loaded
243 * quickly in user space in vgetcpu. (12 bits for the CPU
244 * and 8 bits for the node)
245 */
246 d.limit0 = cpu | ((node & 0xf) << 12);
247 d.limit = node >> 4;
248 d.type = 5; /* RO data, expand down, accessed */
249 d.dpl = 3; /* Visible to user code */
250 d.s = 1; /* Not a system segment */
251 d.p = 1; /* Present */
252 d.d = 1; /* 32-bit */
253
254 write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_PER_CPU, &d, DESCTYPE_S);
255}
256
257static int
258vgetcpu_cpu_notifier(struct notifier_block *n, unsigned long action, void *arg)
259{
260 long cpu = (long)arg;
261
262 if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN)
263 smp_call_function_single(cpu, vgetcpu_cpu_init, NULL, 1);
264
265 return NOTIFY_DONE;
266}
267
268static int __init init_vdso(void)
269{
270 init_vdso_image(&vdso_image_64);
271
272#ifdef CONFIG_X86_X32_ABI
273 init_vdso_image(&vdso_image_x32);
274#endif
275
276 cpu_notifier_register_begin();
277
278 on_each_cpu(vgetcpu_cpu_init, NULL, 1);
279 /* notifier priority > KVM */
280 __hotcpu_notifier(vgetcpu_cpu_notifier, 30);
281
282 cpu_notifier_register_done();
283
284 return 0;
285}
286subsys_initcall(init_vdso);
287#endif /* CONFIG_X86_64 */
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 9855eb8ee4b3..8c8298d78185 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1412,8 +1412,10 @@ static int xen_pgd_alloc(struct mm_struct *mm)
1412 page->private = (unsigned long)user_pgd; 1412 page->private = (unsigned long)user_pgd;
1413 1413
1414 if (user_pgd != NULL) { 1414 if (user_pgd != NULL) {
1415#ifdef CONFIG_X86_VSYSCALL_EMULATION
1415 user_pgd[pgd_index(VSYSCALL_ADDR)] = 1416 user_pgd[pgd_index(VSYSCALL_ADDR)] =
1416 __pgd(__pa(level3_user_vsyscall) | _PAGE_TABLE); 1417 __pgd(__pa(level3_user_vsyscall) | _PAGE_TABLE);
1418#endif
1417 ret = 0; 1419 ret = 0;
1418 } 1420 }
1419 1421
@@ -1976,7 +1978,7 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
1976# ifdef CONFIG_HIGHMEM 1978# ifdef CONFIG_HIGHMEM
1977 case FIX_KMAP_BEGIN ... FIX_KMAP_END: 1979 case FIX_KMAP_BEGIN ... FIX_KMAP_END:
1978# endif 1980# endif
1979#else 1981#elif defined(CONFIG_X86_VSYSCALL_EMULATION)
1980 case VSYSCALL_PAGE: 1982 case VSYSCALL_PAGE:
1981#endif 1983#endif
1982 case FIX_TEXT_POKE0: 1984 case FIX_TEXT_POKE0:
@@ -2015,7 +2017,7 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
2015 2017
2016 __native_set_fixmap(idx, pte); 2018 __native_set_fixmap(idx, pte);
2017 2019
2018#ifdef CONFIG_X86_64 2020#ifdef CONFIG_X86_VSYSCALL_EMULATION
2019 /* Replicate changes to map the vsyscall page into the user 2021 /* Replicate changes to map the vsyscall page into the user
2020 pagetable vsyscall mapping. */ 2022 pagetable vsyscall mapping. */
2021 if (idx == VSYSCALL_PAGE) { 2023 if (idx == VSYSCALL_PAGE) {