diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-12-10 17:24:20 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-12-10 17:24:20 -0500 |
commit | 3100e448e7d74489a96cb7b45d88fe6962774eaa (patch) | |
tree | 53e46a702bd191ca43639b560d2bb1d3b0ad18c8 /arch | |
parent | c9f861c77269bc9950c16c6404a9476062241671 (diff) | |
parent | 26893107aa717cd11010f0c278d02535defa1ac9 (diff) |
Merge branch 'x86-vdso-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 vdso updates from Ingo Molnar:
"Various vDSO updates from Andy Lutomirski, mostly cleanups and
reorganization to improve maintainability, but also some
micro-optimizations and robustization changes"
* 'x86-vdso-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
x86_64/vsyscall: Restore orig_ax after vsyscall seccomp
x86_64: Add a comment explaining the TASK_SIZE_MAX guard page
x86_64,vsyscall: Make vsyscall emulation configurable
x86_64, vsyscall: Rewrite comment and clean up headers in vsyscall code
x86_64, vsyscall: Turn vsyscalls all the way off when vsyscall==none
x86,vdso: Use LSL unconditionally for vgetcpu
x86: vdso: Fix build with older gcc
x86_64/vdso: Clean up vgetcpu init and merge the vdso initcalls
x86_64/vdso: Remove jiffies from the vvar page
x86/vdso: Make the PER_CPU segment 32 bits
x86/vdso: Make the PER_CPU segment start out accessed
x86/vdso: Change the PER_CPU segment to use struct desc_struct
x86_64/vdso: Move getcpu code from vsyscall_64.c to vdso/vma.c
x86_64/vsyscall: Move all of the gate_area code to vsyscall_64.c
Diffstat (limited to 'arch')
-rw-r--r-- | arch/x86/Kconfig | 18 | ||||
-rw-r--r-- | arch/x86/include/asm/fixmap.h | 2 | ||||
-rw-r--r-- | arch/x86/include/asm/page_64.h | 4 | ||||
-rw-r--r-- | arch/x86/include/asm/processor.h | 8 | ||||
-rw-r--r-- | arch/x86/include/asm/vgtod.h | 19 | ||||
-rw-r--r-- | arch/x86/include/asm/vsyscall.h | 33 | ||||
-rw-r--r-- | arch/x86/include/asm/vvar.h | 2 | ||||
-rw-r--r-- | arch/x86/kernel/Makefile | 3 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/common.c | 10 | ||||
-rw-r--r-- | arch/x86/kernel/setup.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/time.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/vsyscall_64.c | 147 | ||||
-rw-r--r-- | arch/x86/mm/init_64.c | 49 | ||||
-rw-r--r-- | arch/x86/vdso/vgetcpu.c | 2 | ||||
-rw-r--r-- | arch/x86/vdso/vma.c | 83 | ||||
-rw-r--r-- | arch/x86/xen/mmu.c | 6 |
16 files changed, 190 insertions, 200 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 666ac6651c17..bea3a0159496 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -992,6 +992,24 @@ config X86_ESPFIX64 | |||
992 | def_bool y | 992 | def_bool y |
993 | depends on X86_16BIT && X86_64 | 993 | depends on X86_16BIT && X86_64 |
994 | 994 | ||
995 | config X86_VSYSCALL_EMULATION | ||
996 | bool "Enable vsyscall emulation" if EXPERT | ||
997 | default y | ||
998 | depends on X86_64 | ||
999 | ---help--- | ||
1000 | This enables emulation of the legacy vsyscall page. Disabling | ||
1001 | it is roughly equivalent to booting with vsyscall=none, except | ||
1002 | that it will also disable the helpful warning if a program | ||
1003 | tries to use a vsyscall. With this option set to N, offending | ||
1004 | programs will just segfault, citing addresses of the form | ||
1005 | 0xffffffffff600?00. | ||
1006 | |||
1007 | This option is required by many programs built before 2013, and | ||
1008 | care should be used even with newer programs if set to N. | ||
1009 | |||
1010 | Disabling this option saves about 7K of kernel size and | ||
1011 | possibly 4K of additional runtime pagetable memory. | ||
1012 | |||
995 | config TOSHIBA | 1013 | config TOSHIBA |
996 | tristate "Toshiba Laptop support" | 1014 | tristate "Toshiba Laptop support" |
997 | depends on X86_32 | 1015 | depends on X86_32 |
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h index bf728e49c53c..f80d70009ff8 100644 --- a/arch/x86/include/asm/fixmap.h +++ b/arch/x86/include/asm/fixmap.h | |||
@@ -69,7 +69,9 @@ enum fixed_addresses { | |||
69 | #ifdef CONFIG_X86_32 | 69 | #ifdef CONFIG_X86_32 |
70 | FIX_HOLE, | 70 | FIX_HOLE, |
71 | #else | 71 | #else |
72 | #ifdef CONFIG_X86_VSYSCALL_EMULATION | ||
72 | VSYSCALL_PAGE = (FIXADDR_TOP - VSYSCALL_ADDR) >> PAGE_SHIFT, | 73 | VSYSCALL_PAGE = (FIXADDR_TOP - VSYSCALL_ADDR) >> PAGE_SHIFT, |
74 | #endif | ||
73 | #ifdef CONFIG_PARAVIRT_CLOCK | 75 | #ifdef CONFIG_PARAVIRT_CLOCK |
74 | PVCLOCK_FIXMAP_BEGIN, | 76 | PVCLOCK_FIXMAP_BEGIN, |
75 | PVCLOCK_FIXMAP_END = PVCLOCK_FIXMAP_BEGIN+PVCLOCK_VSYSCALL_NR_PAGES-1, | 77 | PVCLOCK_FIXMAP_END = PVCLOCK_FIXMAP_BEGIN+PVCLOCK_VSYSCALL_NR_PAGES-1, |
diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h index f408caf73430..b3bebf9e5746 100644 --- a/arch/x86/include/asm/page_64.h +++ b/arch/x86/include/asm/page_64.h | |||
@@ -39,6 +39,8 @@ void copy_page(void *to, void *from); | |||
39 | 39 | ||
40 | #endif /* !__ASSEMBLY__ */ | 40 | #endif /* !__ASSEMBLY__ */ |
41 | 41 | ||
42 | #define __HAVE_ARCH_GATE_AREA 1 | 42 | #ifdef CONFIG_X86_VSYSCALL_EMULATION |
43 | # define __HAVE_ARCH_GATE_AREA 1 | ||
44 | #endif | ||
43 | 45 | ||
44 | #endif /* _ASM_X86_PAGE_64_H */ | 46 | #endif /* _ASM_X86_PAGE_64_H */ |
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 25b8de0f21c0..a092a0cce0b7 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h | |||
@@ -894,7 +894,13 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk); | |||
894 | 894 | ||
895 | #else | 895 | #else |
896 | /* | 896 | /* |
897 | * User space process size. 47bits minus one guard page. | 897 | * User space process size. 47bits minus one guard page. The guard |
898 | * page is necessary on Intel CPUs: if a SYSCALL instruction is at | ||
899 | * the highest possible canonical userspace address, then that | ||
900 | * syscall will enter the kernel with a non-canonical return | ||
901 | * address, and SYSRET will explode dangerously. We avoid this | ||
902 | * particular problem by preventing anything from being mapped | ||
903 | * at the maximum canonical address. | ||
898 | */ | 904 | */ |
899 | #define TASK_SIZE_MAX ((1UL << 47) - PAGE_SIZE) | 905 | #define TASK_SIZE_MAX ((1UL << 47) - PAGE_SIZE) |
900 | 906 | ||
diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h index 3c3366c2e37f..e7e9682a33e9 100644 --- a/arch/x86/include/asm/vgtod.h +++ b/arch/x86/include/asm/vgtod.h | |||
@@ -70,4 +70,23 @@ static inline void gtod_write_end(struct vsyscall_gtod_data *s) | |||
70 | ++s->seq; | 70 | ++s->seq; |
71 | } | 71 | } |
72 | 72 | ||
73 | #ifdef CONFIG_X86_64 | ||
74 | |||
75 | #define VGETCPU_CPU_MASK 0xfff | ||
76 | |||
77 | static inline unsigned int __getcpu(void) | ||
78 | { | ||
79 | unsigned int p; | ||
80 | |||
81 | /* | ||
82 | * Load per CPU data from GDT. LSL is faster than RDTSCP and | ||
83 | * works on all CPUs. | ||
84 | */ | ||
85 | asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG)); | ||
86 | |||
87 | return p; | ||
88 | } | ||
89 | |||
90 | #endif /* CONFIG_X86_64 */ | ||
91 | |||
73 | #endif /* _ASM_X86_VGTOD_H */ | 92 | #endif /* _ASM_X86_VGTOD_H */ |
diff --git a/arch/x86/include/asm/vsyscall.h b/arch/x86/include/asm/vsyscall.h index 2a46ca720afc..6ba66ee79710 100644 --- a/arch/x86/include/asm/vsyscall.h +++ b/arch/x86/include/asm/vsyscall.h | |||
@@ -4,15 +4,7 @@ | |||
4 | #include <linux/seqlock.h> | 4 | #include <linux/seqlock.h> |
5 | #include <uapi/asm/vsyscall.h> | 5 | #include <uapi/asm/vsyscall.h> |
6 | 6 | ||
7 | #define VGETCPU_RDTSCP 1 | 7 | #ifdef CONFIG_X86_VSYSCALL_EMULATION |
8 | #define VGETCPU_LSL 2 | ||
9 | |||
10 | /* kernel space (writeable) */ | ||
11 | extern int vgetcpu_mode; | ||
12 | extern struct timezone sys_tz; | ||
13 | |||
14 | #include <asm/vvar.h> | ||
15 | |||
16 | extern void map_vsyscall(void); | 8 | extern void map_vsyscall(void); |
17 | 9 | ||
18 | /* | 10 | /* |
@@ -20,25 +12,12 @@ extern void map_vsyscall(void); | |||
20 | * Returns true if handled. | 12 | * Returns true if handled. |
21 | */ | 13 | */ |
22 | extern bool emulate_vsyscall(struct pt_regs *regs, unsigned long address); | 14 | extern bool emulate_vsyscall(struct pt_regs *regs, unsigned long address); |
23 | 15 | #else | |
24 | #ifdef CONFIG_X86_64 | 16 | static inline void map_vsyscall(void) {} |
25 | 17 | static inline bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) | |
26 | #define VGETCPU_CPU_MASK 0xfff | ||
27 | |||
28 | static inline unsigned int __getcpu(void) | ||
29 | { | 18 | { |
30 | unsigned int p; | 19 | return false; |
31 | |||
32 | if (VVAR(vgetcpu_mode) == VGETCPU_RDTSCP) { | ||
33 | /* Load per CPU data from RDTSCP */ | ||
34 | native_read_tscp(&p); | ||
35 | } else { | ||
36 | /* Load per CPU data from GDT */ | ||
37 | asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG)); | ||
38 | } | ||
39 | |||
40 | return p; | ||
41 | } | 20 | } |
42 | #endif /* CONFIG_X86_64 */ | 21 | #endif |
43 | 22 | ||
44 | #endif /* _ASM_X86_VSYSCALL_H */ | 23 | #endif /* _ASM_X86_VSYSCALL_H */ |
diff --git a/arch/x86/include/asm/vvar.h b/arch/x86/include/asm/vvar.h index 5d2b9ad2c6d2..3f32dfc2ab73 100644 --- a/arch/x86/include/asm/vvar.h +++ b/arch/x86/include/asm/vvar.h | |||
@@ -44,8 +44,6 @@ extern char __vvar_page; | |||
44 | 44 | ||
45 | /* DECLARE_VVAR(offset, type, name) */ | 45 | /* DECLARE_VVAR(offset, type, name) */ |
46 | 46 | ||
47 | DECLARE_VVAR(0, volatile unsigned long, jiffies) | ||
48 | DECLARE_VVAR(16, int, vgetcpu_mode) | ||
49 | DECLARE_VVAR(128, struct vsyscall_gtod_data, vsyscall_gtod_data) | 47 | DECLARE_VVAR(128, struct vsyscall_gtod_data, vsyscall_gtod_data) |
50 | 48 | ||
51 | #undef DECLARE_VVAR | 49 | #undef DECLARE_VVAR |
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 8f1e77440b2b..5d4502c8b983 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
@@ -28,8 +28,7 @@ obj-$(CONFIG_X86_32) += i386_ksyms_32.o | |||
28 | obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o | 28 | obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o |
29 | obj-$(CONFIG_X86_64) += mcount_64.o | 29 | obj-$(CONFIG_X86_64) += mcount_64.o |
30 | obj-y += syscall_$(BITS).o vsyscall_gtod.o | 30 | obj-y += syscall_$(BITS).o vsyscall_gtod.o |
31 | obj-$(CONFIG_X86_64) += vsyscall_64.o | 31 | obj-$(CONFIG_X86_VSYSCALL_EMULATION) += vsyscall_64.o vsyscall_emu_64.o |
32 | obj-$(CONFIG_X86_64) += vsyscall_emu_64.o | ||
33 | obj-$(CONFIG_X86_ESPFIX64) += espfix_64.o | 32 | obj-$(CONFIG_X86_ESPFIX64) += espfix_64.o |
34 | obj-$(CONFIG_SYSFS) += ksysfs.o | 33 | obj-$(CONFIG_SYSFS) += ksysfs.o |
35 | obj-y += bootflag.o e820.o | 34 | obj-y += bootflag.o e820.o |
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index cfa9b5b2c27a..c6049650c093 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
@@ -958,14 +958,6 @@ static void identify_cpu(struct cpuinfo_x86 *c) | |||
958 | } | 958 | } |
959 | 959 | ||
960 | #ifdef CONFIG_X86_64 | 960 | #ifdef CONFIG_X86_64 |
961 | static void vgetcpu_set_mode(void) | ||
962 | { | ||
963 | if (cpu_has(&boot_cpu_data, X86_FEATURE_RDTSCP)) | ||
964 | vgetcpu_mode = VGETCPU_RDTSCP; | ||
965 | else | ||
966 | vgetcpu_mode = VGETCPU_LSL; | ||
967 | } | ||
968 | |||
969 | #ifdef CONFIG_IA32_EMULATION | 961 | #ifdef CONFIG_IA32_EMULATION |
970 | /* May not be __init: called during resume */ | 962 | /* May not be __init: called during resume */ |
971 | static void syscall32_cpu_init(void) | 963 | static void syscall32_cpu_init(void) |
@@ -1008,8 +1000,6 @@ void __init identify_boot_cpu(void) | |||
1008 | #ifdef CONFIG_X86_32 | 1000 | #ifdef CONFIG_X86_32 |
1009 | sysenter_setup(); | 1001 | sysenter_setup(); |
1010 | enable_sep_cpu(); | 1002 | enable_sep_cpu(); |
1011 | #else | ||
1012 | vgetcpu_set_mode(); | ||
1013 | #endif | 1003 | #endif |
1014 | cpu_detect_tlb(&boot_cpu_data); | 1004 | cpu_detect_tlb(&boot_cpu_data); |
1015 | } | 1005 | } |
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 214245d6b996..ab4734e5411d 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c | |||
@@ -1192,9 +1192,7 @@ void __init setup_arch(char **cmdline_p) | |||
1192 | 1192 | ||
1193 | tboot_probe(); | 1193 | tboot_probe(); |
1194 | 1194 | ||
1195 | #ifdef CONFIG_X86_64 | ||
1196 | map_vsyscall(); | 1195 | map_vsyscall(); |
1197 | #endif | ||
1198 | 1196 | ||
1199 | generic_apic_probe(); | 1197 | generic_apic_probe(); |
1200 | 1198 | ||
diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c index 0fa29609b2c4..25adc0e16eaa 100644 --- a/arch/x86/kernel/time.c +++ b/arch/x86/kernel/time.c | |||
@@ -23,7 +23,7 @@ | |||
23 | #include <asm/time.h> | 23 | #include <asm/time.h> |
24 | 24 | ||
25 | #ifdef CONFIG_X86_64 | 25 | #ifdef CONFIG_X86_64 |
26 | __visible DEFINE_VVAR(volatile unsigned long, jiffies) = INITIAL_JIFFIES; | 26 | __visible volatile unsigned long jiffies __cacheline_aligned = INITIAL_JIFFIES; |
27 | #endif | 27 | #endif |
28 | 28 | ||
29 | unsigned long profile_pc(struct pt_regs *regs) | 29 | unsigned long profile_pc(struct pt_regs *regs) |
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index 957779f4eb40..2dcc6ff6fdcc 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c | |||
@@ -1,59 +1,43 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (c) 2012-2014 Andy Lutomirski <luto@amacapital.net> | ||
3 | * | ||
4 | * Based on the original implementation which is: | ||
2 | * Copyright (C) 2001 Andrea Arcangeli <andrea@suse.de> SuSE | 5 | * Copyright (C) 2001 Andrea Arcangeli <andrea@suse.de> SuSE |
3 | * Copyright 2003 Andi Kleen, SuSE Labs. | 6 | * Copyright 2003 Andi Kleen, SuSE Labs. |
4 | * | 7 | * |
5 | * [ NOTE: this mechanism is now deprecated in favor of the vDSO. ] | 8 | * Parts of the original code have been moved to arch/x86/vdso/vma.c |
9 | * | ||
10 | * This file implements vsyscall emulation. vsyscalls are a legacy ABI: | ||
11 | * Userspace can request certain kernel services by calling fixed | ||
12 | * addresses. This concept is problematic: | ||
6 | * | 13 | * |
7 | * Thanks to hpa@transmeta.com for some useful hint. | 14 | * - It interferes with ASLR. |
8 | * Special thanks to Ingo Molnar for his early experience with | 15 | * - It's awkward to write code that lives in kernel addresses but is |
9 | * a different vsyscall implementation for Linux/IA32 and for the name. | 16 | * callable by userspace at fixed addresses. |
17 | * - The whole concept is impossible for 32-bit compat userspace. | ||
18 | * - UML cannot easily virtualize a vsyscall. | ||
10 | * | 19 | * |
11 | * vsyscall 1 is located at -10Mbyte, vsyscall 2 is located | 20 | * As of mid-2014, I believe that there is no new userspace code that |
12 | * at virtual address -10Mbyte+1024bytes etc... There are at max 4 | 21 | * will use a vsyscall if the vDSO is present. I hope that there will |
13 | * vsyscalls. One vsyscall can reserve more than 1 slot to avoid | 22 | * soon be no new userspace code that will ever use a vsyscall. |
14 | * jumping out of line if necessary. We cannot add more with this | ||
15 | * mechanism because older kernels won't return -ENOSYS. | ||
16 | * | 23 | * |
17 | * Note: the concept clashes with user mode linux. UML users should | 24 | * The code in this file emulates vsyscalls when notified of a page |
18 | * use the vDSO. | 25 | * fault to a vsyscall address. |
19 | */ | 26 | */ |
20 | 27 | ||
21 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
22 | |||
23 | #include <linux/time.h> | ||
24 | #include <linux/init.h> | ||
25 | #include <linux/kernel.h> | 28 | #include <linux/kernel.h> |
26 | #include <linux/timer.h> | 29 | #include <linux/timer.h> |
27 | #include <linux/seqlock.h> | ||
28 | #include <linux/jiffies.h> | ||
29 | #include <linux/sysctl.h> | ||
30 | #include <linux/topology.h> | ||
31 | #include <linux/timekeeper_internal.h> | ||
32 | #include <linux/getcpu.h> | ||
33 | #include <linux/cpu.h> | ||
34 | #include <linux/smp.h> | ||
35 | #include <linux/notifier.h> | ||
36 | #include <linux/syscalls.h> | 30 | #include <linux/syscalls.h> |
37 | #include <linux/ratelimit.h> | 31 | #include <linux/ratelimit.h> |
38 | 32 | ||
39 | #include <asm/vsyscall.h> | 33 | #include <asm/vsyscall.h> |
40 | #include <asm/pgtable.h> | ||
41 | #include <asm/compat.h> | ||
42 | #include <asm/page.h> | ||
43 | #include <asm/unistd.h> | 34 | #include <asm/unistd.h> |
44 | #include <asm/fixmap.h> | 35 | #include <asm/fixmap.h> |
45 | #include <asm/errno.h> | ||
46 | #include <asm/io.h> | ||
47 | #include <asm/segment.h> | ||
48 | #include <asm/desc.h> | ||
49 | #include <asm/topology.h> | ||
50 | #include <asm/traps.h> | 36 | #include <asm/traps.h> |
51 | 37 | ||
52 | #define CREATE_TRACE_POINTS | 38 | #define CREATE_TRACE_POINTS |
53 | #include "vsyscall_trace.h" | 39 | #include "vsyscall_trace.h" |
54 | 40 | ||
55 | DEFINE_VVAR(int, vgetcpu_mode); | ||
56 | |||
57 | static enum { EMULATE, NATIVE, NONE } vsyscall_mode = EMULATE; | 41 | static enum { EMULATE, NATIVE, NONE } vsyscall_mode = EMULATE; |
58 | 42 | ||
59 | static int __init vsyscall_setup(char *str) | 43 | static int __init vsyscall_setup(char *str) |
@@ -222,6 +206,7 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) | |||
222 | "seccomp tried to change syscall nr or ip"); | 206 | "seccomp tried to change syscall nr or ip"); |
223 | do_exit(SIGSYS); | 207 | do_exit(SIGSYS); |
224 | } | 208 | } |
209 | regs->orig_ax = -1; | ||
225 | if (tmp) | 210 | if (tmp) |
226 | goto do_ret; /* skip requested */ | 211 | goto do_ret; /* skip requested */ |
227 | 212 | ||
@@ -284,46 +269,54 @@ sigsegv: | |||
284 | } | 269 | } |
285 | 270 | ||
286 | /* | 271 | /* |
287 | * Assume __initcall executes before all user space. Hopefully kmod | 272 | * A pseudo VMA to allow ptrace access for the vsyscall page. This only |
288 | * doesn't violate that. We'll find out if it does. | 273 | * covers the 64bit vsyscall page now. 32bit has a real VMA now and does |
274 | * not need special handling anymore: | ||
289 | */ | 275 | */ |
290 | static void vsyscall_set_cpu(int cpu) | 276 | static const char *gate_vma_name(struct vm_area_struct *vma) |
291 | { | 277 | { |
292 | unsigned long d; | 278 | return "[vsyscall]"; |
293 | unsigned long node = 0; | ||
294 | #ifdef CONFIG_NUMA | ||
295 | node = cpu_to_node(cpu); | ||
296 | #endif | ||
297 | if (cpu_has(&cpu_data(cpu), X86_FEATURE_RDTSCP)) | ||
298 | write_rdtscp_aux((node << 12) | cpu); | ||
299 | |||
300 | /* | ||
301 | * Store cpu number in limit so that it can be loaded quickly | ||
302 | * in user space in vgetcpu. (12 bits for the CPU and 8 bits for the node) | ||
303 | */ | ||
304 | d = 0x0f40000000000ULL; | ||
305 | d |= cpu; | ||
306 | d |= (node & 0xf) << 12; | ||
307 | d |= (node >> 4) << 48; | ||
308 | |||
309 | write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_PER_CPU, &d, DESCTYPE_S); | ||
310 | } | 279 | } |
311 | 280 | static struct vm_operations_struct gate_vma_ops = { | |
312 | static void cpu_vsyscall_init(void *arg) | 281 | .name = gate_vma_name, |
282 | }; | ||
283 | static struct vm_area_struct gate_vma = { | ||
284 | .vm_start = VSYSCALL_ADDR, | ||
285 | .vm_end = VSYSCALL_ADDR + PAGE_SIZE, | ||
286 | .vm_page_prot = PAGE_READONLY_EXEC, | ||
287 | .vm_flags = VM_READ | VM_EXEC, | ||
288 | .vm_ops = &gate_vma_ops, | ||
289 | }; | ||
290 | |||
291 | struct vm_area_struct *get_gate_vma(struct mm_struct *mm) | ||
313 | { | 292 | { |
314 | /* preemption should be already off */ | 293 | #ifdef CONFIG_IA32_EMULATION |
315 | vsyscall_set_cpu(raw_smp_processor_id()); | 294 | if (!mm || mm->context.ia32_compat) |
295 | return NULL; | ||
296 | #endif | ||
297 | if (vsyscall_mode == NONE) | ||
298 | return NULL; | ||
299 | return &gate_vma; | ||
316 | } | 300 | } |
317 | 301 | ||
318 | static int | 302 | int in_gate_area(struct mm_struct *mm, unsigned long addr) |
319 | cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg) | ||
320 | { | 303 | { |
321 | long cpu = (long)arg; | 304 | struct vm_area_struct *vma = get_gate_vma(mm); |
305 | |||
306 | if (!vma) | ||
307 | return 0; | ||
322 | 308 | ||
323 | if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN) | 309 | return (addr >= vma->vm_start) && (addr < vma->vm_end); |
324 | smp_call_function_single(cpu, cpu_vsyscall_init, NULL, 1); | 310 | } |
325 | 311 | ||
326 | return NOTIFY_DONE; | 312 | /* |
313 | * Use this when you have no reliable mm, typically from interrupt | ||
314 | * context. It is less reliable than using a task's mm and may give | ||
315 | * false positives. | ||
316 | */ | ||
317 | int in_gate_area_no_mm(unsigned long addr) | ||
318 | { | ||
319 | return vsyscall_mode != NONE && (addr & PAGE_MASK) == VSYSCALL_ADDR; | ||
327 | } | 320 | } |
328 | 321 | ||
329 | void __init map_vsyscall(void) | 322 | void __init map_vsyscall(void) |
@@ -331,24 +324,12 @@ void __init map_vsyscall(void) | |||
331 | extern char __vsyscall_page; | 324 | extern char __vsyscall_page; |
332 | unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page); | 325 | unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page); |
333 | 326 | ||
334 | __set_fixmap(VSYSCALL_PAGE, physaddr_vsyscall, | 327 | if (vsyscall_mode != NONE) |
335 | vsyscall_mode == NATIVE | 328 | __set_fixmap(VSYSCALL_PAGE, physaddr_vsyscall, |
336 | ? PAGE_KERNEL_VSYSCALL | 329 | vsyscall_mode == NATIVE |
337 | : PAGE_KERNEL_VVAR); | 330 | ? PAGE_KERNEL_VSYSCALL |
331 | : PAGE_KERNEL_VVAR); | ||
332 | |||
338 | BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_PAGE) != | 333 | BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_PAGE) != |
339 | (unsigned long)VSYSCALL_ADDR); | 334 | (unsigned long)VSYSCALL_ADDR); |
340 | } | 335 | } |
341 | |||
342 | static int __init vsyscall_init(void) | ||
343 | { | ||
344 | cpu_notifier_register_begin(); | ||
345 | |||
346 | on_each_cpu(cpu_vsyscall_init, NULL, 1); | ||
347 | /* notifier priority > KVM */ | ||
348 | __hotcpu_notifier(cpu_vsyscall_notifier, 30); | ||
349 | |||
350 | cpu_notifier_register_done(); | ||
351 | |||
352 | return 0; | ||
353 | } | ||
354 | __initcall(vsyscall_init); | ||
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 78e53c80fc12..30eb05ae7061 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c | |||
@@ -1204,55 +1204,6 @@ int kern_addr_valid(unsigned long addr) | |||
1204 | return pfn_valid(pte_pfn(*pte)); | 1204 | return pfn_valid(pte_pfn(*pte)); |
1205 | } | 1205 | } |
1206 | 1206 | ||
1207 | /* | ||
1208 | * A pseudo VMA to allow ptrace access for the vsyscall page. This only | ||
1209 | * covers the 64bit vsyscall page now. 32bit has a real VMA now and does | ||
1210 | * not need special handling anymore: | ||
1211 | */ | ||
1212 | static const char *gate_vma_name(struct vm_area_struct *vma) | ||
1213 | { | ||
1214 | return "[vsyscall]"; | ||
1215 | } | ||
1216 | static struct vm_operations_struct gate_vma_ops = { | ||
1217 | .name = gate_vma_name, | ||
1218 | }; | ||
1219 | static struct vm_area_struct gate_vma = { | ||
1220 | .vm_start = VSYSCALL_ADDR, | ||
1221 | .vm_end = VSYSCALL_ADDR + PAGE_SIZE, | ||
1222 | .vm_page_prot = PAGE_READONLY_EXEC, | ||
1223 | .vm_flags = VM_READ | VM_EXEC, | ||
1224 | .vm_ops = &gate_vma_ops, | ||
1225 | }; | ||
1226 | |||
1227 | struct vm_area_struct *get_gate_vma(struct mm_struct *mm) | ||
1228 | { | ||
1229 | #ifdef CONFIG_IA32_EMULATION | ||
1230 | if (!mm || mm->context.ia32_compat) | ||
1231 | return NULL; | ||
1232 | #endif | ||
1233 | return &gate_vma; | ||
1234 | } | ||
1235 | |||
1236 | int in_gate_area(struct mm_struct *mm, unsigned long addr) | ||
1237 | { | ||
1238 | struct vm_area_struct *vma = get_gate_vma(mm); | ||
1239 | |||
1240 | if (!vma) | ||
1241 | return 0; | ||
1242 | |||
1243 | return (addr >= vma->vm_start) && (addr < vma->vm_end); | ||
1244 | } | ||
1245 | |||
1246 | /* | ||
1247 | * Use this when you have no reliable mm, typically from interrupt | ||
1248 | * context. It is less reliable than using a task's mm and may give | ||
1249 | * false positives. | ||
1250 | */ | ||
1251 | int in_gate_area_no_mm(unsigned long addr) | ||
1252 | { | ||
1253 | return (addr & PAGE_MASK) == VSYSCALL_ADDR; | ||
1254 | } | ||
1255 | |||
1256 | static unsigned long probe_memory_block_size(void) | 1207 | static unsigned long probe_memory_block_size(void) |
1257 | { | 1208 | { |
1258 | /* start from 2g */ | 1209 | /* start from 2g */ |
diff --git a/arch/x86/vdso/vgetcpu.c b/arch/x86/vdso/vgetcpu.c index 2f94b039e55b..8ec3d1f4ce9a 100644 --- a/arch/x86/vdso/vgetcpu.c +++ b/arch/x86/vdso/vgetcpu.c | |||
@@ -7,9 +7,7 @@ | |||
7 | 7 | ||
8 | #include <linux/kernel.h> | 8 | #include <linux/kernel.h> |
9 | #include <linux/getcpu.h> | 9 | #include <linux/getcpu.h> |
10 | #include <linux/jiffies.h> | ||
11 | #include <linux/time.h> | 10 | #include <linux/time.h> |
12 | #include <asm/vsyscall.h> | ||
13 | #include <asm/vgtod.h> | 11 | #include <asm/vgtod.h> |
14 | 12 | ||
15 | notrace long | 13 | notrace long |
diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c index 970463b566cf..009495b9ab4b 100644 --- a/arch/x86/vdso/vma.c +++ b/arch/x86/vdso/vma.c | |||
@@ -1,7 +1,8 @@ | |||
1 | /* | 1 | /* |
2 | * Set up the VMAs to tell the VM about the vDSO. | ||
3 | * Copyright 2007 Andi Kleen, SUSE Labs. | 2 | * Copyright 2007 Andi Kleen, SUSE Labs. |
4 | * Subject to the GPL, v.2 | 3 | * Subject to the GPL, v.2 |
4 | * | ||
5 | * This contains most of the x86 vDSO kernel-side code. | ||
5 | */ | 6 | */ |
6 | #include <linux/mm.h> | 7 | #include <linux/mm.h> |
7 | #include <linux/err.h> | 8 | #include <linux/err.h> |
@@ -10,17 +11,17 @@ | |||
10 | #include <linux/init.h> | 11 | #include <linux/init.h> |
11 | #include <linux/random.h> | 12 | #include <linux/random.h> |
12 | #include <linux/elf.h> | 13 | #include <linux/elf.h> |
13 | #include <asm/vsyscall.h> | 14 | #include <linux/cpu.h> |
14 | #include <asm/vgtod.h> | 15 | #include <asm/vgtod.h> |
15 | #include <asm/proto.h> | 16 | #include <asm/proto.h> |
16 | #include <asm/vdso.h> | 17 | #include <asm/vdso.h> |
18 | #include <asm/vvar.h> | ||
17 | #include <asm/page.h> | 19 | #include <asm/page.h> |
18 | #include <asm/hpet.h> | 20 | #include <asm/hpet.h> |
21 | #include <asm/desc.h> | ||
19 | 22 | ||
20 | #if defined(CONFIG_X86_64) | 23 | #if defined(CONFIG_X86_64) |
21 | unsigned int __read_mostly vdso64_enabled = 1; | 24 | unsigned int __read_mostly vdso64_enabled = 1; |
22 | |||
23 | extern unsigned short vdso_sync_cpuid; | ||
24 | #endif | 25 | #endif |
25 | 26 | ||
26 | void __init init_vdso_image(const struct vdso_image *image) | 27 | void __init init_vdso_image(const struct vdso_image *image) |
@@ -38,20 +39,6 @@ void __init init_vdso_image(const struct vdso_image *image) | |||
38 | image->alt_len)); | 39 | image->alt_len)); |
39 | } | 40 | } |
40 | 41 | ||
41 | #if defined(CONFIG_X86_64) | ||
42 | static int __init init_vdso(void) | ||
43 | { | ||
44 | init_vdso_image(&vdso_image_64); | ||
45 | |||
46 | #ifdef CONFIG_X86_X32_ABI | ||
47 | init_vdso_image(&vdso_image_x32); | ||
48 | #endif | ||
49 | |||
50 | return 0; | ||
51 | } | ||
52 | subsys_initcall(init_vdso); | ||
53 | #endif | ||
54 | |||
55 | struct linux_binprm; | 42 | struct linux_binprm; |
56 | 43 | ||
57 | /* Put the vdso above the (randomized) stack with another randomized offset. | 44 | /* Put the vdso above the (randomized) stack with another randomized offset. |
@@ -238,3 +225,63 @@ static __init int vdso_setup(char *s) | |||
238 | } | 225 | } |
239 | __setup("vdso=", vdso_setup); | 226 | __setup("vdso=", vdso_setup); |
240 | #endif | 227 | #endif |
228 | |||
229 | #ifdef CONFIG_X86_64 | ||
230 | static void vgetcpu_cpu_init(void *arg) | ||
231 | { | ||
232 | int cpu = smp_processor_id(); | ||
233 | struct desc_struct d = { }; | ||
234 | unsigned long node = 0; | ||
235 | #ifdef CONFIG_NUMA | ||
236 | node = cpu_to_node(cpu); | ||
237 | #endif | ||
238 | if (cpu_has(&cpu_data(cpu), X86_FEATURE_RDTSCP)) | ||
239 | write_rdtscp_aux((node << 12) | cpu); | ||
240 | |||
241 | /* | ||
242 | * Store cpu number in limit so that it can be loaded | ||
243 | * quickly in user space in vgetcpu. (12 bits for the CPU | ||
244 | * and 8 bits for the node) | ||
245 | */ | ||
246 | d.limit0 = cpu | ((node & 0xf) << 12); | ||
247 | d.limit = node >> 4; | ||
248 | d.type = 5; /* RO data, expand down, accessed */ | ||
249 | d.dpl = 3; /* Visible to user code */ | ||
250 | d.s = 1; /* Not a system segment */ | ||
251 | d.p = 1; /* Present */ | ||
252 | d.d = 1; /* 32-bit */ | ||
253 | |||
254 | write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_PER_CPU, &d, DESCTYPE_S); | ||
255 | } | ||
256 | |||
257 | static int | ||
258 | vgetcpu_cpu_notifier(struct notifier_block *n, unsigned long action, void *arg) | ||
259 | { | ||
260 | long cpu = (long)arg; | ||
261 | |||
262 | if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN) | ||
263 | smp_call_function_single(cpu, vgetcpu_cpu_init, NULL, 1); | ||
264 | |||
265 | return NOTIFY_DONE; | ||
266 | } | ||
267 | |||
268 | static int __init init_vdso(void) | ||
269 | { | ||
270 | init_vdso_image(&vdso_image_64); | ||
271 | |||
272 | #ifdef CONFIG_X86_X32_ABI | ||
273 | init_vdso_image(&vdso_image_x32); | ||
274 | #endif | ||
275 | |||
276 | cpu_notifier_register_begin(); | ||
277 | |||
278 | on_each_cpu(vgetcpu_cpu_init, NULL, 1); | ||
279 | /* notifier priority > KVM */ | ||
280 | __hotcpu_notifier(vgetcpu_cpu_notifier, 30); | ||
281 | |||
282 | cpu_notifier_register_done(); | ||
283 | |||
284 | return 0; | ||
285 | } | ||
286 | subsys_initcall(init_vdso); | ||
287 | #endif /* CONFIG_X86_64 */ | ||
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 9855eb8ee4b3..8c8298d78185 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c | |||
@@ -1412,8 +1412,10 @@ static int xen_pgd_alloc(struct mm_struct *mm) | |||
1412 | page->private = (unsigned long)user_pgd; | 1412 | page->private = (unsigned long)user_pgd; |
1413 | 1413 | ||
1414 | if (user_pgd != NULL) { | 1414 | if (user_pgd != NULL) { |
1415 | #ifdef CONFIG_X86_VSYSCALL_EMULATION | ||
1415 | user_pgd[pgd_index(VSYSCALL_ADDR)] = | 1416 | user_pgd[pgd_index(VSYSCALL_ADDR)] = |
1416 | __pgd(__pa(level3_user_vsyscall) | _PAGE_TABLE); | 1417 | __pgd(__pa(level3_user_vsyscall) | _PAGE_TABLE); |
1418 | #endif | ||
1417 | ret = 0; | 1419 | ret = 0; |
1418 | } | 1420 | } |
1419 | 1421 | ||
@@ -1976,7 +1978,7 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot) | |||
1976 | # ifdef CONFIG_HIGHMEM | 1978 | # ifdef CONFIG_HIGHMEM |
1977 | case FIX_KMAP_BEGIN ... FIX_KMAP_END: | 1979 | case FIX_KMAP_BEGIN ... FIX_KMAP_END: |
1978 | # endif | 1980 | # endif |
1979 | #else | 1981 | #elif defined(CONFIG_X86_VSYSCALL_EMULATION) |
1980 | case VSYSCALL_PAGE: | 1982 | case VSYSCALL_PAGE: |
1981 | #endif | 1983 | #endif |
1982 | case FIX_TEXT_POKE0: | 1984 | case FIX_TEXT_POKE0: |
@@ -2015,7 +2017,7 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot) | |||
2015 | 2017 | ||
2016 | __native_set_fixmap(idx, pte); | 2018 | __native_set_fixmap(idx, pte); |
2017 | 2019 | ||
2018 | #ifdef CONFIG_X86_64 | 2020 | #ifdef CONFIG_X86_VSYSCALL_EMULATION |
2019 | /* Replicate changes to map the vsyscall page into the user | 2021 | /* Replicate changes to map the vsyscall page into the user |
2020 | pagetable vsyscall mapping. */ | 2022 | pagetable vsyscall mapping. */ |
2021 | if (idx == VSYSCALL_PAGE) { | 2023 | if (idx == VSYSCALL_PAGE) { |