diff options
30 files changed, 617 insertions, 507 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 121d5fcbd94a..2311dad7a57a 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt | |||
@@ -3424,14 +3424,24 @@ bytes respectively. Such letter suffixes can also be entirely omitted. | |||
3424 | of CONFIG_HIGHPTE. | 3424 | of CONFIG_HIGHPTE. |
3425 | 3425 | ||
3426 | vdso= [X86,SH] | 3426 | vdso= [X86,SH] |
3427 | vdso=2: enable compat VDSO (default with COMPAT_VDSO) | 3427 | On X86_32, this is an alias for vdso32=. Otherwise: |
3428 | vdso=1: enable VDSO (default) | 3428 | |
3429 | vdso=1: enable VDSO (the default) | ||
3429 | vdso=0: disable VDSO mapping | 3430 | vdso=0: disable VDSO mapping |
3430 | 3431 | ||
3431 | vdso32= [X86] | 3432 | vdso32= [X86] Control the 32-bit vDSO |
3432 | vdso32=2: enable compat VDSO (default with COMPAT_VDSO) | 3433 | vdso32=1: enable 32-bit VDSO |
3433 | vdso32=1: enable 32-bit VDSO (default) | 3434 | vdso32=0 or vdso32=2: disable 32-bit VDSO |
3434 | vdso32=0: disable 32-bit VDSO mapping | 3435 | |
3436 | See the help text for CONFIG_COMPAT_VDSO for more | ||
3437 | details. If CONFIG_COMPAT_VDSO is set, the default is | ||
3438 | vdso32=0; otherwise, the default is vdso32=1. | ||
3439 | |||
3440 | For compatibility with older kernels, vdso32=2 is an | ||
3441 | alias for vdso32=0. | ||
3442 | |||
3443 | Try vdso32=0 if you encounter an error that says: | ||
3444 | dl_main: Assertion `(void *) ph->p_vaddr == _rtld_local._dl_sysinfo_dso' failed! | ||
3435 | 3445 | ||
3436 | vector= [IA-64,SMP] | 3446 | vector= [IA-64,SMP] |
3437 | vector=percpu: enable percpu vector domain | 3447 | vector=percpu: enable percpu vector domain |
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 26237934ac87..ac04d9804391 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -107,9 +107,9 @@ config X86 | |||
107 | select HAVE_ARCH_SOFT_DIRTY | 107 | select HAVE_ARCH_SOFT_DIRTY |
108 | select CLOCKSOURCE_WATCHDOG | 108 | select CLOCKSOURCE_WATCHDOG |
109 | select GENERIC_CLOCKEVENTS | 109 | select GENERIC_CLOCKEVENTS |
110 | select ARCH_CLOCKSOURCE_DATA if X86_64 | 110 | select ARCH_CLOCKSOURCE_DATA |
111 | select GENERIC_CLOCKEVENTS_BROADCAST if X86_64 || (X86_32 && X86_LOCAL_APIC) | 111 | select GENERIC_CLOCKEVENTS_BROADCAST if X86_64 || (X86_32 && X86_LOCAL_APIC) |
112 | select GENERIC_TIME_VSYSCALL if X86_64 | 112 | select GENERIC_TIME_VSYSCALL |
113 | select KTIME_SCALAR if X86_32 | 113 | select KTIME_SCALAR if X86_32 |
114 | select GENERIC_STRNCPY_FROM_USER | 114 | select GENERIC_STRNCPY_FROM_USER |
115 | select GENERIC_STRNLEN_USER | 115 | select GENERIC_STRNLEN_USER |
@@ -1848,17 +1848,29 @@ config DEBUG_HOTPLUG_CPU0 | |||
1848 | If unsure, say N. | 1848 | If unsure, say N. |
1849 | 1849 | ||
1850 | config COMPAT_VDSO | 1850 | config COMPAT_VDSO |
1851 | def_bool y | 1851 | def_bool n |
1852 | prompt "Compat VDSO support" | 1852 | prompt "Disable the 32-bit vDSO (needed for glibc 2.3.3)" |
1853 | depends on X86_32 || IA32_EMULATION | 1853 | depends on X86_32 || IA32_EMULATION |
1854 | ---help--- | 1854 | ---help--- |
1855 | Map the 32-bit VDSO to the predictable old-style address too. | 1855 | Certain buggy versions of glibc will crash if they are |
1856 | presented with a 32-bit vDSO that is not mapped at the address | ||
1857 | indicated in its segment table. | ||
1856 | 1858 | ||
1857 | Say N here if you are running a sufficiently recent glibc | 1859 | The bug was introduced by f866314b89d56845f55e6f365e18b31ec978ec3a |
1858 | version (2.3.3 or later), to remove the high-mapped | 1860 | and fixed by 3b3ddb4f7db98ec9e912ccdf54d35df4aa30e04a and |
1859 | VDSO mapping and to exclusively use the randomized VDSO. | 1861 | 49ad572a70b8aeb91e57483a11dd1b77e31c4468. Glibc 2.3.3 is |
1862 | the only released version with the bug, but OpenSUSE 9 | ||
1863 | contains a buggy "glibc 2.3.2". | ||
1860 | 1864 | ||
1861 | If unsure, say Y. | 1865 | The symptom of the bug is that everything crashes on startup, saying: |
1866 | dl_main: Assertion `(void *) ph->p_vaddr == _rtld_local._dl_sysinfo_dso' failed! | ||
1867 | |||
1868 | Saying Y here changes the default value of the vdso32 boot | ||
1869 | option from 1 to 0, which turns off the 32-bit vDSO entirely. | ||
1870 | This works around the glibc bug but hurts performance. | ||
1871 | |||
1872 | If unsure, say N: if you are compiling your own kernel, you | ||
1873 | are unlikely to be using a buggy version of glibc. | ||
1862 | 1874 | ||
1863 | config CMDLINE_BOOL | 1875 | config CMDLINE_BOOL |
1864 | bool "Built-in kernel command line" | 1876 | bool "Built-in kernel command line" |
diff --git a/arch/x86/include/asm/clocksource.h b/arch/x86/include/asm/clocksource.h index 16a57f4ed64d..eda81dc0f4ae 100644 --- a/arch/x86/include/asm/clocksource.h +++ b/arch/x86/include/asm/clocksource.h | |||
@@ -3,8 +3,6 @@ | |||
3 | #ifndef _ASM_X86_CLOCKSOURCE_H | 3 | #ifndef _ASM_X86_CLOCKSOURCE_H |
4 | #define _ASM_X86_CLOCKSOURCE_H | 4 | #define _ASM_X86_CLOCKSOURCE_H |
5 | 5 | ||
6 | #ifdef CONFIG_X86_64 | ||
7 | |||
8 | #define VCLOCK_NONE 0 /* No vDSO clock available. */ | 6 | #define VCLOCK_NONE 0 /* No vDSO clock available. */ |
9 | #define VCLOCK_TSC 1 /* vDSO should use vread_tsc. */ | 7 | #define VCLOCK_TSC 1 /* vDSO should use vread_tsc. */ |
10 | #define VCLOCK_HPET 2 /* vDSO should use vread_hpet. */ | 8 | #define VCLOCK_HPET 2 /* vDSO should use vread_hpet. */ |
@@ -14,6 +12,4 @@ struct arch_clocksource_data { | |||
14 | int vclock_mode; | 12 | int vclock_mode; |
15 | }; | 13 | }; |
16 | 14 | ||
17 | #endif /* CONFIG_X86_64 */ | ||
18 | |||
19 | #endif /* _ASM_X86_CLOCKSOURCE_H */ | 15 | #endif /* _ASM_X86_CLOCKSOURCE_H */ |
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index 9c999c1674fa..2c71182d30ef 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h | |||
@@ -281,16 +281,12 @@ do { \ | |||
281 | 281 | ||
282 | #define STACK_RND_MASK (0x7ff) | 282 | #define STACK_RND_MASK (0x7ff) |
283 | 283 | ||
284 | #define VDSO_HIGH_BASE (__fix_to_virt(FIX_VDSO)) | ||
285 | |||
286 | #define ARCH_DLINFO ARCH_DLINFO_IA32(vdso_enabled) | 284 | #define ARCH_DLINFO ARCH_DLINFO_IA32(vdso_enabled) |
287 | 285 | ||
288 | /* update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT entries changes */ | 286 | /* update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT entries changes */ |
289 | 287 | ||
290 | #else /* CONFIG_X86_32 */ | 288 | #else /* CONFIG_X86_32 */ |
291 | 289 | ||
292 | #define VDSO_HIGH_BASE 0xffffe000U /* CONFIG_COMPAT_VDSO address */ | ||
293 | |||
294 | /* 1GB for 64bit, 8MB for 32bit */ | 290 | /* 1GB for 64bit, 8MB for 32bit */ |
295 | #define STACK_RND_MASK (test_thread_flag(TIF_ADDR32) ? 0x7ff : 0x3fffff) | 291 | #define STACK_RND_MASK (test_thread_flag(TIF_ADDR32) ? 0x7ff : 0x3fffff) |
296 | 292 | ||
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h index 7252cd339175..2377f5618fb7 100644 --- a/arch/x86/include/asm/fixmap.h +++ b/arch/x86/include/asm/fixmap.h | |||
@@ -40,15 +40,8 @@ | |||
40 | */ | 40 | */ |
41 | extern unsigned long __FIXADDR_TOP; | 41 | extern unsigned long __FIXADDR_TOP; |
42 | #define FIXADDR_TOP ((unsigned long)__FIXADDR_TOP) | 42 | #define FIXADDR_TOP ((unsigned long)__FIXADDR_TOP) |
43 | |||
44 | #define FIXADDR_USER_START __fix_to_virt(FIX_VDSO) | ||
45 | #define FIXADDR_USER_END __fix_to_virt(FIX_VDSO - 1) | ||
46 | #else | 43 | #else |
47 | #define FIXADDR_TOP (VSYSCALL_END-PAGE_SIZE) | 44 | #define FIXADDR_TOP (VSYSCALL_END-PAGE_SIZE) |
48 | |||
49 | /* Only covers 32bit vsyscalls currently. Need another set for 64bit. */ | ||
50 | #define FIXADDR_USER_START ((unsigned long)VSYSCALL32_VSYSCALL) | ||
51 | #define FIXADDR_USER_END (FIXADDR_USER_START + PAGE_SIZE) | ||
52 | #endif | 45 | #endif |
53 | 46 | ||
54 | 47 | ||
@@ -74,7 +67,6 @@ extern unsigned long __FIXADDR_TOP; | |||
74 | enum fixed_addresses { | 67 | enum fixed_addresses { |
75 | #ifdef CONFIG_X86_32 | 68 | #ifdef CONFIG_X86_32 |
76 | FIX_HOLE, | 69 | FIX_HOLE, |
77 | FIX_VDSO, | ||
78 | #else | 70 | #else |
79 | VSYSCALL_LAST_PAGE, | 71 | VSYSCALL_LAST_PAGE, |
80 | VSYSCALL_FIRST_PAGE = VSYSCALL_LAST_PAGE | 72 | VSYSCALL_FIRST_PAGE = VSYSCALL_LAST_PAGE |
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 708f19fb4fc7..eb3d44945133 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h | |||
@@ -214,13 +214,8 @@ | |||
214 | #ifdef CONFIG_X86_64 | 214 | #ifdef CONFIG_X86_64 |
215 | #define __PAGE_KERNEL_IDENT_LARGE_EXEC __PAGE_KERNEL_LARGE_EXEC | 215 | #define __PAGE_KERNEL_IDENT_LARGE_EXEC __PAGE_KERNEL_LARGE_EXEC |
216 | #else | 216 | #else |
217 | /* | ||
218 | * For PDE_IDENT_ATTR include USER bit. As the PDE and PTE protection | ||
219 | * bits are combined, this will alow user to access the high address mapped | ||
220 | * VDSO in the presence of CONFIG_COMPAT_VDSO | ||
221 | */ | ||
222 | #define PTE_IDENT_ATTR 0x003 /* PRESENT+RW */ | 217 | #define PTE_IDENT_ATTR 0x003 /* PRESENT+RW */ |
223 | #define PDE_IDENT_ATTR 0x067 /* PRESENT+RW+USER+DIRTY+ACCESSED */ | 218 | #define PDE_IDENT_ATTR 0x063 /* PRESENT+RW+DIRTY+ACCESSED */ |
224 | #define PGD_IDENT_ATTR 0x001 /* PRESENT (no other attributes) */ | 219 | #define PGD_IDENT_ATTR 0x001 /* PRESENT (no other attributes) */ |
225 | #endif | 220 | #endif |
226 | 221 | ||
diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h index fddb53d63915..d1dc55404ff1 100644 --- a/arch/x86/include/asm/vdso.h +++ b/arch/x86/include/asm/vdso.h | |||
@@ -1,8 +1,45 @@ | |||
1 | #ifndef _ASM_X86_VDSO_H | 1 | #ifndef _ASM_X86_VDSO_H |
2 | #define _ASM_X86_VDSO_H | 2 | #define _ASM_X86_VDSO_H |
3 | 3 | ||
4 | #include <asm/page_types.h> | ||
5 | #include <linux/linkage.h> | ||
6 | |||
7 | #ifdef __ASSEMBLER__ | ||
8 | |||
9 | #define DEFINE_VDSO_IMAGE(symname, filename) \ | ||
10 | __PAGE_ALIGNED_DATA ; \ | ||
11 | .globl symname##_start, symname##_end ; \ | ||
12 | .align PAGE_SIZE ; \ | ||
13 | symname##_start: ; \ | ||
14 | .incbin filename ; \ | ||
15 | symname##_end: ; \ | ||
16 | .align PAGE_SIZE /* extra data here leaks to userspace. */ ; \ | ||
17 | \ | ||
18 | .previous ; \ | ||
19 | \ | ||
20 | .globl symname##_pages ; \ | ||
21 | .bss ; \ | ||
22 | .align 8 ; \ | ||
23 | .type symname##_pages, @object ; \ | ||
24 | symname##_pages: ; \ | ||
25 | .zero (symname##_end - symname##_start + PAGE_SIZE - 1) / PAGE_SIZE * (BITS_PER_LONG / 8) ; \ | ||
26 | .size symname##_pages, .-symname##_pages | ||
27 | |||
28 | #else | ||
29 | |||
30 | #define DECLARE_VDSO_IMAGE(symname) \ | ||
31 | extern char symname##_start[], symname##_end[]; \ | ||
32 | extern struct page *symname##_pages[] | ||
33 | |||
4 | #if defined CONFIG_X86_32 || defined CONFIG_COMPAT | 34 | #if defined CONFIG_X86_32 || defined CONFIG_COMPAT |
5 | extern const char VDSO32_PRELINK[]; | 35 | |
36 | #include <asm/vdso32.h> | ||
37 | |||
38 | DECLARE_VDSO_IMAGE(vdso32_int80); | ||
39 | #ifdef CONFIG_COMPAT | ||
40 | DECLARE_VDSO_IMAGE(vdso32_syscall); | ||
41 | #endif | ||
42 | DECLARE_VDSO_IMAGE(vdso32_sysenter); | ||
6 | 43 | ||
7 | /* | 44 | /* |
8 | * Given a pointer to the vDSO image, find the pointer to VDSO32_name | 45 | * Given a pointer to the vDSO image, find the pointer to VDSO32_name |
@@ -11,8 +48,7 @@ extern const char VDSO32_PRELINK[]; | |||
11 | #define VDSO32_SYMBOL(base, name) \ | 48 | #define VDSO32_SYMBOL(base, name) \ |
12 | ({ \ | 49 | ({ \ |
13 | extern const char VDSO32_##name[]; \ | 50 | extern const char VDSO32_##name[]; \ |
14 | (void __user *)(VDSO32_##name - VDSO32_PRELINK + \ | 51 | (void __user *)(VDSO32_##name + (unsigned long)(base)); \ |
15 | (unsigned long)(base)); \ | ||
16 | }) | 52 | }) |
17 | #endif | 53 | #endif |
18 | 54 | ||
@@ -23,12 +59,8 @@ extern const char VDSO32_PRELINK[]; | |||
23 | extern void __user __kernel_sigreturn; | 59 | extern void __user __kernel_sigreturn; |
24 | extern void __user __kernel_rt_sigreturn; | 60 | extern void __user __kernel_rt_sigreturn; |
25 | 61 | ||
26 | /* | 62 | void __init patch_vdso32(void *vdso, size_t len); |
27 | * These symbols are defined by vdso32.S to mark the bounds | 63 | |
28 | * of the ELF DSO images included therein. | 64 | #endif /* __ASSEMBLER__ */ |
29 | */ | ||
30 | extern const char vdso32_int80_start, vdso32_int80_end; | ||
31 | extern const char vdso32_syscall_start, vdso32_syscall_end; | ||
32 | extern const char vdso32_sysenter_start, vdso32_sysenter_end; | ||
33 | 65 | ||
34 | #endif /* _ASM_X86_VDSO_H */ | 66 | #endif /* _ASM_X86_VDSO_H */ |
diff --git a/arch/x86/include/asm/vdso32.h b/arch/x86/include/asm/vdso32.h new file mode 100644 index 000000000000..7efb7018406e --- /dev/null +++ b/arch/x86/include/asm/vdso32.h | |||
@@ -0,0 +1,11 @@ | |||
1 | #ifndef _ASM_X86_VDSO32_H | ||
2 | #define _ASM_X86_VDSO32_H | ||
3 | |||
4 | #define VDSO_BASE_PAGE 0 | ||
5 | #define VDSO_VVAR_PAGE 1 | ||
6 | #define VDSO_HPET_PAGE 2 | ||
7 | #define VDSO_PAGES 3 | ||
8 | #define VDSO_PREV_PAGES 2 | ||
9 | #define VDSO_OFFSET(x) ((x) * PAGE_SIZE) | ||
10 | |||
11 | #endif | ||
diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h index 46e24d36b7da..3c3366c2e37f 100644 --- a/arch/x86/include/asm/vgtod.h +++ b/arch/x86/include/asm/vgtod.h | |||
@@ -1,30 +1,73 @@ | |||
1 | #ifndef _ASM_X86_VGTOD_H | 1 | #ifndef _ASM_X86_VGTOD_H |
2 | #define _ASM_X86_VGTOD_H | 2 | #define _ASM_X86_VGTOD_H |
3 | 3 | ||
4 | #include <asm/vsyscall.h> | 4 | #include <linux/compiler.h> |
5 | #include <linux/clocksource.h> | 5 | #include <linux/clocksource.h> |
6 | 6 | ||
7 | #ifdef BUILD_VDSO32_64 | ||
8 | typedef u64 gtod_long_t; | ||
9 | #else | ||
10 | typedef unsigned long gtod_long_t; | ||
11 | #endif | ||
12 | /* | ||
13 | * vsyscall_gtod_data will be accessed by 32 and 64 bit code at the same time | ||
14 | * so be carefull by modifying this structure. | ||
15 | */ | ||
7 | struct vsyscall_gtod_data { | 16 | struct vsyscall_gtod_data { |
8 | seqcount_t seq; | 17 | unsigned seq; |
9 | 18 | ||
10 | struct { /* extract of a clocksource struct */ | 19 | int vclock_mode; |
11 | int vclock_mode; | 20 | cycle_t cycle_last; |
12 | cycle_t cycle_last; | 21 | cycle_t mask; |
13 | cycle_t mask; | 22 | u32 mult; |
14 | u32 mult; | 23 | u32 shift; |
15 | u32 shift; | ||
16 | } clock; | ||
17 | 24 | ||
18 | /* open coded 'struct timespec' */ | 25 | /* open coded 'struct timespec' */ |
19 | time_t wall_time_sec; | ||
20 | u64 wall_time_snsec; | 26 | u64 wall_time_snsec; |
27 | gtod_long_t wall_time_sec; | ||
28 | gtod_long_t monotonic_time_sec; | ||
21 | u64 monotonic_time_snsec; | 29 | u64 monotonic_time_snsec; |
22 | time_t monotonic_time_sec; | 30 | gtod_long_t wall_time_coarse_sec; |
31 | gtod_long_t wall_time_coarse_nsec; | ||
32 | gtod_long_t monotonic_time_coarse_sec; | ||
33 | gtod_long_t monotonic_time_coarse_nsec; | ||
23 | 34 | ||
24 | struct timezone sys_tz; | 35 | int tz_minuteswest; |
25 | struct timespec wall_time_coarse; | 36 | int tz_dsttime; |
26 | struct timespec monotonic_time_coarse; | ||
27 | }; | 37 | }; |
28 | extern struct vsyscall_gtod_data vsyscall_gtod_data; | 38 | extern struct vsyscall_gtod_data vsyscall_gtod_data; |
29 | 39 | ||
40 | static inline unsigned gtod_read_begin(const struct vsyscall_gtod_data *s) | ||
41 | { | ||
42 | unsigned ret; | ||
43 | |||
44 | repeat: | ||
45 | ret = ACCESS_ONCE(s->seq); | ||
46 | if (unlikely(ret & 1)) { | ||
47 | cpu_relax(); | ||
48 | goto repeat; | ||
49 | } | ||
50 | smp_rmb(); | ||
51 | return ret; | ||
52 | } | ||
53 | |||
54 | static inline int gtod_read_retry(const struct vsyscall_gtod_data *s, | ||
55 | unsigned start) | ||
56 | { | ||
57 | smp_rmb(); | ||
58 | return unlikely(s->seq != start); | ||
59 | } | ||
60 | |||
61 | static inline void gtod_write_begin(struct vsyscall_gtod_data *s) | ||
62 | { | ||
63 | ++s->seq; | ||
64 | smp_wmb(); | ||
65 | } | ||
66 | |||
67 | static inline void gtod_write_end(struct vsyscall_gtod_data *s) | ||
68 | { | ||
69 | smp_wmb(); | ||
70 | ++s->seq; | ||
71 | } | ||
72 | |||
30 | #endif /* _ASM_X86_VGTOD_H */ | 73 | #endif /* _ASM_X86_VGTOD_H */ |
diff --git a/arch/x86/include/asm/vvar.h b/arch/x86/include/asm/vvar.h index d76ac40da206..081d909bc495 100644 --- a/arch/x86/include/asm/vvar.h +++ b/arch/x86/include/asm/vvar.h | |||
@@ -16,8 +16,8 @@ | |||
16 | * you mess up, the linker will catch it.) | 16 | * you mess up, the linker will catch it.) |
17 | */ | 17 | */ |
18 | 18 | ||
19 | /* Base address of vvars. This is not ABI. */ | 19 | #ifndef _ASM_X86_VVAR_H |
20 | #define VVAR_ADDRESS (-10*1024*1024 - 4096) | 20 | #define _ASM_X86_VVAR_H |
21 | 21 | ||
22 | #if defined(__VVAR_KERNEL_LDS) | 22 | #if defined(__VVAR_KERNEL_LDS) |
23 | 23 | ||
@@ -29,16 +29,35 @@ | |||
29 | 29 | ||
30 | #else | 30 | #else |
31 | 31 | ||
32 | #ifdef BUILD_VDSO32 | ||
33 | |||
34 | #define DECLARE_VVAR(offset, type, name) \ | ||
35 | extern type vvar_ ## name __attribute__((visibility("hidden"))); | ||
36 | |||
37 | #define VVAR(name) (vvar_ ## name) | ||
38 | |||
39 | #else | ||
40 | |||
41 | extern char __vvar_page; | ||
42 | |||
43 | /* Base address of vvars. This is not ABI. */ | ||
44 | #ifdef CONFIG_X86_64 | ||
45 | #define VVAR_ADDRESS (-10*1024*1024 - 4096) | ||
46 | #else | ||
47 | #define VVAR_ADDRESS (&__vvar_page) | ||
48 | #endif | ||
49 | |||
32 | #define DECLARE_VVAR(offset, type, name) \ | 50 | #define DECLARE_VVAR(offset, type, name) \ |
33 | static type const * const vvaraddr_ ## name = \ | 51 | static type const * const vvaraddr_ ## name = \ |
34 | (void *)(VVAR_ADDRESS + (offset)); | 52 | (void *)(VVAR_ADDRESS + (offset)); |
35 | 53 | ||
54 | #define VVAR(name) (*vvaraddr_ ## name) | ||
55 | #endif | ||
56 | |||
36 | #define DEFINE_VVAR(type, name) \ | 57 | #define DEFINE_VVAR(type, name) \ |
37 | type name \ | 58 | type name \ |
38 | __attribute__((section(".vvar_" #name), aligned(16))) __visible | 59 | __attribute__((section(".vvar_" #name), aligned(16))) __visible |
39 | 60 | ||
40 | #define VVAR(name) (*vvaraddr_ ## name) | ||
41 | |||
42 | #endif | 61 | #endif |
43 | 62 | ||
44 | /* DECLARE_VVAR(offset, type, name) */ | 63 | /* DECLARE_VVAR(offset, type, name) */ |
@@ -48,3 +67,5 @@ DECLARE_VVAR(16, int, vgetcpu_mode) | |||
48 | DECLARE_VVAR(128, struct vsyscall_gtod_data, vsyscall_gtod_data) | 67 | DECLARE_VVAR(128, struct vsyscall_gtod_data, vsyscall_gtod_data) |
49 | 68 | ||
50 | #undef DECLARE_VVAR | 69 | #undef DECLARE_VVAR |
70 | |||
71 | #endif | ||
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index cb648c84b327..f4d96000d33a 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
@@ -26,7 +26,7 @@ obj-$(CONFIG_IRQ_WORK) += irq_work.o | |||
26 | obj-y += probe_roms.o | 26 | obj-y += probe_roms.o |
27 | obj-$(CONFIG_X86_32) += i386_ksyms_32.o | 27 | obj-$(CONFIG_X86_32) += i386_ksyms_32.o |
28 | obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o | 28 | obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o |
29 | obj-y += syscall_$(BITS).o | 29 | obj-y += syscall_$(BITS).o vsyscall_gtod.o |
30 | obj-$(CONFIG_X86_64) += vsyscall_64.o | 30 | obj-$(CONFIG_X86_64) += vsyscall_64.o |
31 | obj-$(CONFIG_X86_64) += vsyscall_emu_64.o | 31 | obj-$(CONFIG_X86_64) += vsyscall_emu_64.o |
32 | obj-$(CONFIG_SYSFS) += ksysfs.o | 32 | obj-$(CONFIG_SYSFS) += ksysfs.o |
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 014618dbaa7b..93eed15a8fd4 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c | |||
@@ -752,9 +752,7 @@ static struct clocksource clocksource_hpet = { | |||
752 | .mask = HPET_MASK, | 752 | .mask = HPET_MASK, |
753 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, | 753 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, |
754 | .resume = hpet_resume_counter, | 754 | .resume = hpet_resume_counter, |
755 | #ifdef CONFIG_X86_64 | ||
756 | .archdata = { .vclock_mode = VCLOCK_HPET }, | 755 | .archdata = { .vclock_mode = VCLOCK_HPET }, |
757 | #endif | ||
758 | }; | 756 | }; |
759 | 757 | ||
760 | static int hpet_clocksource_register(void) | 758 | static int hpet_clocksource_register(void) |
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 7a9296ab8834..57e5ce126d5a 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c | |||
@@ -984,9 +984,7 @@ static struct clocksource clocksource_tsc = { | |||
984 | .mask = CLOCKSOURCE_MASK(64), | 984 | .mask = CLOCKSOURCE_MASK(64), |
985 | .flags = CLOCK_SOURCE_IS_CONTINUOUS | | 985 | .flags = CLOCK_SOURCE_IS_CONTINUOUS | |
986 | CLOCK_SOURCE_MUST_VERIFY, | 986 | CLOCK_SOURCE_MUST_VERIFY, |
987 | #ifdef CONFIG_X86_64 | ||
988 | .archdata = { .vclock_mode = VCLOCK_TSC }, | 987 | .archdata = { .vclock_mode = VCLOCK_TSC }, |
989 | #endif | ||
990 | }; | 988 | }; |
991 | 989 | ||
992 | void mark_tsc_unstable(char *reason) | 990 | void mark_tsc_unstable(char *reason) |
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index da6b35a98260..49edf2dd3613 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S | |||
@@ -147,7 +147,6 @@ SECTIONS | |||
147 | _edata = .; | 147 | _edata = .; |
148 | } :data | 148 | } :data |
149 | 149 | ||
150 | #ifdef CONFIG_X86_64 | ||
151 | 150 | ||
152 | . = ALIGN(PAGE_SIZE); | 151 | . = ALIGN(PAGE_SIZE); |
153 | __vvar_page = .; | 152 | __vvar_page = .; |
@@ -165,12 +164,15 @@ SECTIONS | |||
165 | #undef __VVAR_KERNEL_LDS | 164 | #undef __VVAR_KERNEL_LDS |
166 | #undef EMIT_VVAR | 165 | #undef EMIT_VVAR |
167 | 166 | ||
167 | /* | ||
168 | * Pad the rest of the page with zeros. Otherwise the loader | ||
169 | * can leave garbage here. | ||
170 | */ | ||
171 | . = __vvar_beginning_hack + PAGE_SIZE; | ||
168 | } :data | 172 | } :data |
169 | 173 | ||
170 | . = ALIGN(__vvar_page + PAGE_SIZE, PAGE_SIZE); | 174 | . = ALIGN(__vvar_page + PAGE_SIZE, PAGE_SIZE); |
171 | 175 | ||
172 | #endif /* CONFIG_X86_64 */ | ||
173 | |||
174 | /* Init code and data - will be freed after init */ | 176 | /* Init code and data - will be freed after init */ |
175 | . = ALIGN(PAGE_SIZE); | 177 | . = ALIGN(PAGE_SIZE); |
176 | .init.begin : AT(ADDR(.init.begin) - LOAD_OFFSET) { | 178 | .init.begin : AT(ADDR(.init.begin) - LOAD_OFFSET) { |
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index 1f96f9347ed9..9ea287666c65 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c | |||
@@ -47,14 +47,12 @@ | |||
47 | #include <asm/segment.h> | 47 | #include <asm/segment.h> |
48 | #include <asm/desc.h> | 48 | #include <asm/desc.h> |
49 | #include <asm/topology.h> | 49 | #include <asm/topology.h> |
50 | #include <asm/vgtod.h> | ||
51 | #include <asm/traps.h> | 50 | #include <asm/traps.h> |
52 | 51 | ||
53 | #define CREATE_TRACE_POINTS | 52 | #define CREATE_TRACE_POINTS |
54 | #include "vsyscall_trace.h" | 53 | #include "vsyscall_trace.h" |
55 | 54 | ||
56 | DEFINE_VVAR(int, vgetcpu_mode); | 55 | DEFINE_VVAR(int, vgetcpu_mode); |
57 | DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data); | ||
58 | 56 | ||
59 | static enum { EMULATE, NATIVE, NONE } vsyscall_mode = EMULATE; | 57 | static enum { EMULATE, NATIVE, NONE } vsyscall_mode = EMULATE; |
60 | 58 | ||
@@ -77,48 +75,6 @@ static int __init vsyscall_setup(char *str) | |||
77 | } | 75 | } |
78 | early_param("vsyscall", vsyscall_setup); | 76 | early_param("vsyscall", vsyscall_setup); |
79 | 77 | ||
80 | void update_vsyscall_tz(void) | ||
81 | { | ||
82 | vsyscall_gtod_data.sys_tz = sys_tz; | ||
83 | } | ||
84 | |||
85 | void update_vsyscall(struct timekeeper *tk) | ||
86 | { | ||
87 | struct vsyscall_gtod_data *vdata = &vsyscall_gtod_data; | ||
88 | |||
89 | write_seqcount_begin(&vdata->seq); | ||
90 | |||
91 | /* copy vsyscall data */ | ||
92 | vdata->clock.vclock_mode = tk->clock->archdata.vclock_mode; | ||
93 | vdata->clock.cycle_last = tk->clock->cycle_last; | ||
94 | vdata->clock.mask = tk->clock->mask; | ||
95 | vdata->clock.mult = tk->mult; | ||
96 | vdata->clock.shift = tk->shift; | ||
97 | |||
98 | vdata->wall_time_sec = tk->xtime_sec; | ||
99 | vdata->wall_time_snsec = tk->xtime_nsec; | ||
100 | |||
101 | vdata->monotonic_time_sec = tk->xtime_sec | ||
102 | + tk->wall_to_monotonic.tv_sec; | ||
103 | vdata->monotonic_time_snsec = tk->xtime_nsec | ||
104 | + (tk->wall_to_monotonic.tv_nsec | ||
105 | << tk->shift); | ||
106 | while (vdata->monotonic_time_snsec >= | ||
107 | (((u64)NSEC_PER_SEC) << tk->shift)) { | ||
108 | vdata->monotonic_time_snsec -= | ||
109 | ((u64)NSEC_PER_SEC) << tk->shift; | ||
110 | vdata->monotonic_time_sec++; | ||
111 | } | ||
112 | |||
113 | vdata->wall_time_coarse.tv_sec = tk->xtime_sec; | ||
114 | vdata->wall_time_coarse.tv_nsec = (long)(tk->xtime_nsec >> tk->shift); | ||
115 | |||
116 | vdata->monotonic_time_coarse = timespec_add(vdata->wall_time_coarse, | ||
117 | tk->wall_to_monotonic); | ||
118 | |||
119 | write_seqcount_end(&vdata->seq); | ||
120 | } | ||
121 | |||
122 | static void warn_bad_vsyscall(const char *level, struct pt_regs *regs, | 78 | static void warn_bad_vsyscall(const char *level, struct pt_regs *regs, |
123 | const char *message) | 79 | const char *message) |
124 | { | 80 | { |
@@ -374,7 +330,6 @@ void __init map_vsyscall(void) | |||
374 | { | 330 | { |
375 | extern char __vsyscall_page; | 331 | extern char __vsyscall_page; |
376 | unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page); | 332 | unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page); |
377 | extern char __vvar_page; | ||
378 | unsigned long physaddr_vvar_page = __pa_symbol(&__vvar_page); | 333 | unsigned long physaddr_vvar_page = __pa_symbol(&__vvar_page); |
379 | 334 | ||
380 | __set_fixmap(VSYSCALL_FIRST_PAGE, physaddr_vsyscall, | 335 | __set_fixmap(VSYSCALL_FIRST_PAGE, physaddr_vsyscall, |
diff --git a/arch/x86/kernel/vsyscall_gtod.c b/arch/x86/kernel/vsyscall_gtod.c new file mode 100644 index 000000000000..f9c6e56e14b5 --- /dev/null +++ b/arch/x86/kernel/vsyscall_gtod.c | |||
@@ -0,0 +1,69 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2001 Andrea Arcangeli <andrea@suse.de> SuSE | ||
3 | * Copyright 2003 Andi Kleen, SuSE Labs. | ||
4 | * | ||
5 | * Modified for x86 32 bit architecture by | ||
6 | * Stefani Seibold <stefani@seibold.net> | ||
7 | * sponsored by Rohde & Schwarz GmbH & Co. KG Munich/Germany | ||
8 | * | ||
9 | * Thanks to hpa@transmeta.com for some useful hint. | ||
10 | * Special thanks to Ingo Molnar for his early experience with | ||
11 | * a different vsyscall implementation for Linux/IA32 and for the name. | ||
12 | * | ||
13 | */ | ||
14 | |||
15 | #include <linux/timekeeper_internal.h> | ||
16 | #include <asm/vgtod.h> | ||
17 | #include <asm/vvar.h> | ||
18 | |||
19 | DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data); | ||
20 | |||
21 | void update_vsyscall_tz(void) | ||
22 | { | ||
23 | vsyscall_gtod_data.tz_minuteswest = sys_tz.tz_minuteswest; | ||
24 | vsyscall_gtod_data.tz_dsttime = sys_tz.tz_dsttime; | ||
25 | } | ||
26 | |||
27 | void update_vsyscall(struct timekeeper *tk) | ||
28 | { | ||
29 | struct vsyscall_gtod_data *vdata = &vsyscall_gtod_data; | ||
30 | |||
31 | gtod_write_begin(vdata); | ||
32 | |||
33 | /* copy vsyscall data */ | ||
34 | vdata->vclock_mode = tk->clock->archdata.vclock_mode; | ||
35 | vdata->cycle_last = tk->clock->cycle_last; | ||
36 | vdata->mask = tk->clock->mask; | ||
37 | vdata->mult = tk->mult; | ||
38 | vdata->shift = tk->shift; | ||
39 | |||
40 | vdata->wall_time_sec = tk->xtime_sec; | ||
41 | vdata->wall_time_snsec = tk->xtime_nsec; | ||
42 | |||
43 | vdata->monotonic_time_sec = tk->xtime_sec | ||
44 | + tk->wall_to_monotonic.tv_sec; | ||
45 | vdata->monotonic_time_snsec = tk->xtime_nsec | ||
46 | + (tk->wall_to_monotonic.tv_nsec | ||
47 | << tk->shift); | ||
48 | while (vdata->monotonic_time_snsec >= | ||
49 | (((u64)NSEC_PER_SEC) << tk->shift)) { | ||
50 | vdata->monotonic_time_snsec -= | ||
51 | ((u64)NSEC_PER_SEC) << tk->shift; | ||
52 | vdata->monotonic_time_sec++; | ||
53 | } | ||
54 | |||
55 | vdata->wall_time_coarse_sec = tk->xtime_sec; | ||
56 | vdata->wall_time_coarse_nsec = (long)(tk->xtime_nsec >> tk->shift); | ||
57 | |||
58 | vdata->monotonic_time_coarse_sec = | ||
59 | vdata->wall_time_coarse_sec + tk->wall_to_monotonic.tv_sec; | ||
60 | vdata->monotonic_time_coarse_nsec = | ||
61 | vdata->wall_time_coarse_nsec + tk->wall_to_monotonic.tv_nsec; | ||
62 | |||
63 | while (vdata->monotonic_time_coarse_nsec >= NSEC_PER_SEC) { | ||
64 | vdata->monotonic_time_coarse_nsec -= NSEC_PER_SEC; | ||
65 | vdata->monotonic_time_coarse_sec++; | ||
66 | } | ||
67 | |||
68 | gtod_write_end(vdata); | ||
69 | } | ||
diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c index cfbdbdb4e173..bbb1d2259ecf 100644 --- a/arch/x86/tools/relocs.c +++ b/arch/x86/tools/relocs.c | |||
@@ -69,8 +69,8 @@ static const char * const sym_regex_kernel[S_NSYMTYPES] = { | |||
69 | "__per_cpu_load|" | 69 | "__per_cpu_load|" |
70 | "init_per_cpu__.*|" | 70 | "init_per_cpu__.*|" |
71 | "__end_rodata_hpage_align|" | 71 | "__end_rodata_hpage_align|" |
72 | "__vvar_page|" | ||
73 | #endif | 72 | #endif |
73 | "__vvar_page|" | ||
74 | "_end)$" | 74 | "_end)$" |
75 | }; | 75 | }; |
76 | 76 | ||
diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile index 9206ac7961a5..c580d1210ffe 100644 --- a/arch/x86/vdso/Makefile +++ b/arch/x86/vdso/Makefile | |||
@@ -23,7 +23,8 @@ vobjs-$(VDSOX32-y) += $(vobjx32s-compat) | |||
23 | vobj64s := $(filter-out $(vobjx32s-compat),$(vobjs-y)) | 23 | vobj64s := $(filter-out $(vobjx32s-compat),$(vobjs-y)) |
24 | 24 | ||
25 | # files to link into kernel | 25 | # files to link into kernel |
26 | obj-$(VDSO64-y) += vma.o vdso.o | 26 | obj-y += vma.o |
27 | obj-$(VDSO64-y) += vdso.o | ||
27 | obj-$(VDSOX32-y) += vdsox32.o | 28 | obj-$(VDSOX32-y) += vdsox32.o |
28 | obj-$(VDSO32-y) += vdso32.o vdso32-setup.o | 29 | obj-$(VDSO32-y) += vdso32.o vdso32-setup.o |
29 | 30 | ||
@@ -138,7 +139,7 @@ override obj-dirs = $(dir $(obj)) $(obj)/vdso32/ | |||
138 | 139 | ||
139 | targets += vdso32/vdso32.lds | 140 | targets += vdso32/vdso32.lds |
140 | targets += $(vdso32-images) $(vdso32-images:=.dbg) | 141 | targets += $(vdso32-images) $(vdso32-images:=.dbg) |
141 | targets += vdso32/note.o $(vdso32.so-y:%=vdso32/%.o) | 142 | targets += vdso32/note.o vdso32/vclock_gettime.o $(vdso32.so-y:%=vdso32/%.o) |
142 | 143 | ||
143 | extra-y += $(vdso32-images) | 144 | extra-y += $(vdso32-images) |
144 | 145 | ||
@@ -148,8 +149,19 @@ KBUILD_AFLAGS_32 := $(filter-out -m64,$(KBUILD_AFLAGS)) | |||
148 | $(vdso32-images:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_32) | 149 | $(vdso32-images:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_32) |
149 | $(vdso32-images:%=$(obj)/%.dbg): asflags-$(CONFIG_X86_64) += -m32 | 150 | $(vdso32-images:%=$(obj)/%.dbg): asflags-$(CONFIG_X86_64) += -m32 |
150 | 151 | ||
152 | KBUILD_CFLAGS_32 := $(filter-out -m64,$(KBUILD_CFLAGS)) | ||
153 | KBUILD_CFLAGS_32 := $(filter-out -mcmodel=kernel,$(KBUILD_CFLAGS_32)) | ||
154 | KBUILD_CFLAGS_32 := $(filter-out -fno-pic,$(KBUILD_CFLAGS_32)) | ||
155 | KBUILD_CFLAGS_32 := $(filter-out -mfentry,$(KBUILD_CFLAGS_32)) | ||
156 | KBUILD_CFLAGS_32 += -m32 -msoft-float -mregparm=0 -fpic | ||
157 | KBUILD_CFLAGS_32 += $(call cc-option, -fno-stack-protector) | ||
158 | KBUILD_CFLAGS_32 += $(call cc-option, -foptimize-sibling-calls) | ||
159 | KBUILD_CFLAGS_32 += -fno-omit-frame-pointer | ||
160 | $(vdso32-images:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_32) | ||
161 | |||
151 | $(vdso32-images:%=$(obj)/%.dbg): $(obj)/vdso32-%.so.dbg: FORCE \ | 162 | $(vdso32-images:%=$(obj)/%.dbg): $(obj)/vdso32-%.so.dbg: FORCE \ |
152 | $(obj)/vdso32/vdso32.lds \ | 163 | $(obj)/vdso32/vdso32.lds \ |
164 | $(obj)/vdso32/vclock_gettime.o \ | ||
153 | $(obj)/vdso32/note.o \ | 165 | $(obj)/vdso32/note.o \ |
154 | $(obj)/vdso32/%.o | 166 | $(obj)/vdso32/%.o |
155 | $(call if_changed,vdso) | 167 | $(call if_changed,vdso) |
diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c index eb5d7a56f8d4..16d686171e9a 100644 --- a/arch/x86/vdso/vclock_gettime.c +++ b/arch/x86/vdso/vclock_gettime.c | |||
@@ -4,6 +4,9 @@ | |||
4 | * | 4 | * |
5 | * Fast user context implementation of clock_gettime, gettimeofday, and time. | 5 | * Fast user context implementation of clock_gettime, gettimeofday, and time. |
6 | * | 6 | * |
7 | * 32 Bit compat layer by Stefani Seibold <stefani@seibold.net> | ||
8 | * sponsored by Rohde & Schwarz GmbH & Co. KG Munich/Germany | ||
9 | * | ||
7 | * The code should have no internal unresolved relocations. | 10 | * The code should have no internal unresolved relocations. |
8 | * Check with readelf after changing. | 11 | * Check with readelf after changing. |
9 | */ | 12 | */ |
@@ -11,56 +14,55 @@ | |||
11 | /* Disable profiling for userspace code: */ | 14 | /* Disable profiling for userspace code: */ |
12 | #define DISABLE_BRANCH_PROFILING | 15 | #define DISABLE_BRANCH_PROFILING |
13 | 16 | ||
14 | #include <linux/kernel.h> | 17 | #include <uapi/linux/time.h> |
15 | #include <linux/posix-timers.h> | ||
16 | #include <linux/time.h> | ||
17 | #include <linux/string.h> | ||
18 | #include <asm/vsyscall.h> | ||
19 | #include <asm/fixmap.h> | ||
20 | #include <asm/vgtod.h> | 18 | #include <asm/vgtod.h> |
21 | #include <asm/timex.h> | ||
22 | #include <asm/hpet.h> | 19 | #include <asm/hpet.h> |
20 | #include <asm/vvar.h> | ||
23 | #include <asm/unistd.h> | 21 | #include <asm/unistd.h> |
24 | #include <asm/io.h> | 22 | #include <asm/msr.h> |
25 | #include <asm/pvclock.h> | 23 | #include <linux/math64.h> |
24 | #include <linux/time.h> | ||
26 | 25 | ||
27 | #define gtod (&VVAR(vsyscall_gtod_data)) | 26 | #define gtod (&VVAR(vsyscall_gtod_data)) |
28 | 27 | ||
29 | notrace static cycle_t vread_tsc(void) | 28 | extern int __vdso_clock_gettime(clockid_t clock, struct timespec *ts); |
29 | extern int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz); | ||
30 | extern time_t __vdso_time(time_t *t); | ||
31 | |||
32 | #ifdef CONFIG_HPET_TIMER | ||
33 | static inline u32 read_hpet_counter(const volatile void *addr) | ||
30 | { | 34 | { |
31 | cycle_t ret; | 35 | return *(const volatile u32 *) (addr + HPET_COUNTER); |
32 | u64 last; | 36 | } |
37 | #endif | ||
33 | 38 | ||
34 | /* | 39 | #ifndef BUILD_VDSO32 |
35 | * Empirically, a fence (of type that depends on the CPU) | ||
36 | * before rdtsc is enough to ensure that rdtsc is ordered | ||
37 | * with respect to loads. The various CPU manuals are unclear | ||
38 | * as to whether rdtsc can be reordered with later loads, | ||
39 | * but no one has ever seen it happen. | ||
40 | */ | ||
41 | rdtsc_barrier(); | ||
42 | ret = (cycle_t)vget_cycles(); | ||
43 | 40 | ||
44 | last = VVAR(vsyscall_gtod_data).clock.cycle_last; | 41 | #include <linux/kernel.h> |
42 | #include <asm/vsyscall.h> | ||
43 | #include <asm/fixmap.h> | ||
44 | #include <asm/pvclock.h> | ||
45 | 45 | ||
46 | if (likely(ret >= last)) | 46 | static notrace cycle_t vread_hpet(void) |
47 | return ret; | 47 | { |
48 | return read_hpet_counter((const void *)fix_to_virt(VSYSCALL_HPET)); | ||
49 | } | ||
48 | 50 | ||
49 | /* | 51 | notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) |
50 | * GCC likes to generate cmov here, but this branch is extremely | 52 | { |
51 | * predictable (it's just a funciton of time and the likely is | 53 | long ret; |
52 | * very likely) and there's a data dependence, so force GCC | 54 | asm("syscall" : "=a" (ret) : |
53 | * to generate a branch instead. I don't barrier() because | 55 | "0" (__NR_clock_gettime), "D" (clock), "S" (ts) : "memory"); |
54 | * we don't actually need a barrier, and if this function | 56 | return ret; |
55 | * ever gets inlined it will generate worse code. | ||
56 | */ | ||
57 | asm volatile (""); | ||
58 | return last; | ||
59 | } | 57 | } |
60 | 58 | ||
61 | static notrace cycle_t vread_hpet(void) | 59 | notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz) |
62 | { | 60 | { |
63 | return readl((const void __iomem *)fix_to_virt(VSYSCALL_HPET) + HPET_COUNTER); | 61 | long ret; |
62 | |||
63 | asm("syscall" : "=a" (ret) : | ||
64 | "0" (__NR_gettimeofday), "D" (tv), "S" (tz) : "memory"); | ||
65 | return ret; | ||
64 | } | 66 | } |
65 | 67 | ||
66 | #ifdef CONFIG_PARAVIRT_CLOCK | 68 | #ifdef CONFIG_PARAVIRT_CLOCK |
@@ -124,7 +126,7 @@ static notrace cycle_t vread_pvclock(int *mode) | |||
124 | *mode = VCLOCK_NONE; | 126 | *mode = VCLOCK_NONE; |
125 | 127 | ||
126 | /* refer to tsc.c read_tsc() comment for rationale */ | 128 | /* refer to tsc.c read_tsc() comment for rationale */ |
127 | last = VVAR(vsyscall_gtod_data).clock.cycle_last; | 129 | last = gtod->cycle_last; |
128 | 130 | ||
129 | if (likely(ret >= last)) | 131 | if (likely(ret >= last)) |
130 | return ret; | 132 | return ret; |
@@ -133,11 +135,30 @@ static notrace cycle_t vread_pvclock(int *mode) | |||
133 | } | 135 | } |
134 | #endif | 136 | #endif |
135 | 137 | ||
138 | #else | ||
139 | |||
140 | extern u8 hpet_page | ||
141 | __attribute__((visibility("hidden"))); | ||
142 | |||
143 | #ifdef CONFIG_HPET_TIMER | ||
144 | static notrace cycle_t vread_hpet(void) | ||
145 | { | ||
146 | return read_hpet_counter((const void *)(&hpet_page)); | ||
147 | } | ||
148 | #endif | ||
149 | |||
136 | notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) | 150 | notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) |
137 | { | 151 | { |
138 | long ret; | 152 | long ret; |
139 | asm("syscall" : "=a" (ret) : | 153 | |
140 | "0" (__NR_clock_gettime),"D" (clock), "S" (ts) : "memory"); | 154 | asm( |
155 | "mov %%ebx, %%edx \n" | ||
156 | "mov %2, %%ebx \n" | ||
157 | "call VDSO32_vsyscall \n" | ||
158 | "mov %%edx, %%ebx \n" | ||
159 | : "=a" (ret) | ||
160 | : "0" (__NR_clock_gettime), "g" (clock), "c" (ts) | ||
161 | : "memory", "edx"); | ||
141 | return ret; | 162 | return ret; |
142 | } | 163 | } |
143 | 164 | ||
@@ -145,28 +166,79 @@ notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz) | |||
145 | { | 166 | { |
146 | long ret; | 167 | long ret; |
147 | 168 | ||
148 | asm("syscall" : "=a" (ret) : | 169 | asm( |
149 | "0" (__NR_gettimeofday), "D" (tv), "S" (tz) : "memory"); | 170 | "mov %%ebx, %%edx \n" |
171 | "mov %2, %%ebx \n" | ||
172 | "call VDSO32_vsyscall \n" | ||
173 | "mov %%edx, %%ebx \n" | ||
174 | : "=a" (ret) | ||
175 | : "0" (__NR_gettimeofday), "g" (tv), "c" (tz) | ||
176 | : "memory", "edx"); | ||
150 | return ret; | 177 | return ret; |
151 | } | 178 | } |
152 | 179 | ||
180 | #ifdef CONFIG_PARAVIRT_CLOCK | ||
181 | |||
182 | static notrace cycle_t vread_pvclock(int *mode) | ||
183 | { | ||
184 | *mode = VCLOCK_NONE; | ||
185 | return 0; | ||
186 | } | ||
187 | #endif | ||
188 | |||
189 | #endif | ||
190 | |||
191 | notrace static cycle_t vread_tsc(void) | ||
192 | { | ||
193 | cycle_t ret; | ||
194 | u64 last; | ||
195 | |||
196 | /* | ||
197 | * Empirically, a fence (of type that depends on the CPU) | ||
198 | * before rdtsc is enough to ensure that rdtsc is ordered | ||
199 | * with respect to loads. The various CPU manuals are unclear | ||
200 | * as to whether rdtsc can be reordered with later loads, | ||
201 | * but no one has ever seen it happen. | ||
202 | */ | ||
203 | rdtsc_barrier(); | ||
204 | ret = (cycle_t)__native_read_tsc(); | ||
205 | |||
206 | last = gtod->cycle_last; | ||
207 | |||
208 | if (likely(ret >= last)) | ||
209 | return ret; | ||
210 | |||
211 | /* | ||
212 | * GCC likes to generate cmov here, but this branch is extremely | ||
213 | * predictable (it's just a funciton of time and the likely is | ||
214 | * very likely) and there's a data dependence, so force GCC | ||
215 | * to generate a branch instead. I don't barrier() because | ||
216 | * we don't actually need a barrier, and if this function | ||
217 | * ever gets inlined it will generate worse code. | ||
218 | */ | ||
219 | asm volatile (""); | ||
220 | return last; | ||
221 | } | ||
153 | 222 | ||
154 | notrace static inline u64 vgetsns(int *mode) | 223 | notrace static inline u64 vgetsns(int *mode) |
155 | { | 224 | { |
156 | long v; | 225 | u64 v; |
157 | cycles_t cycles; | 226 | cycles_t cycles; |
158 | if (gtod->clock.vclock_mode == VCLOCK_TSC) | 227 | |
228 | if (gtod->vclock_mode == VCLOCK_TSC) | ||
159 | cycles = vread_tsc(); | 229 | cycles = vread_tsc(); |
160 | else if (gtod->clock.vclock_mode == VCLOCK_HPET) | 230 | #ifdef CONFIG_HPET_TIMER |
231 | else if (gtod->vclock_mode == VCLOCK_HPET) | ||
161 | cycles = vread_hpet(); | 232 | cycles = vread_hpet(); |
233 | #endif | ||
162 | #ifdef CONFIG_PARAVIRT_CLOCK | 234 | #ifdef CONFIG_PARAVIRT_CLOCK |
163 | else if (gtod->clock.vclock_mode == VCLOCK_PVCLOCK) | 235 | else if (gtod->vclock_mode == VCLOCK_PVCLOCK) |
164 | cycles = vread_pvclock(mode); | 236 | cycles = vread_pvclock(mode); |
165 | #endif | 237 | #endif |
166 | else | 238 | else |
167 | return 0; | 239 | return 0; |
168 | v = (cycles - gtod->clock.cycle_last) & gtod->clock.mask; | 240 | v = (cycles - gtod->cycle_last) & gtod->mask; |
169 | return v * gtod->clock.mult; | 241 | return v * gtod->mult; |
170 | } | 242 | } |
171 | 243 | ||
172 | /* Code size doesn't matter (vdso is 4k anyway) and this is faster. */ | 244 | /* Code size doesn't matter (vdso is 4k anyway) and this is faster. */ |
@@ -176,106 +248,102 @@ notrace static int __always_inline do_realtime(struct timespec *ts) | |||
176 | u64 ns; | 248 | u64 ns; |
177 | int mode; | 249 | int mode; |
178 | 250 | ||
179 | ts->tv_nsec = 0; | ||
180 | do { | 251 | do { |
181 | seq = raw_read_seqcount_begin(>od->seq); | 252 | seq = gtod_read_begin(gtod); |
182 | mode = gtod->clock.vclock_mode; | 253 | mode = gtod->vclock_mode; |
183 | ts->tv_sec = gtod->wall_time_sec; | 254 | ts->tv_sec = gtod->wall_time_sec; |
184 | ns = gtod->wall_time_snsec; | 255 | ns = gtod->wall_time_snsec; |
185 | ns += vgetsns(&mode); | 256 | ns += vgetsns(&mode); |
186 | ns >>= gtod->clock.shift; | 257 | ns >>= gtod->shift; |
187 | } while (unlikely(read_seqcount_retry(>od->seq, seq))); | 258 | } while (unlikely(gtod_read_retry(gtod, seq))); |
259 | |||
260 | ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns); | ||
261 | ts->tv_nsec = ns; | ||
188 | 262 | ||
189 | timespec_add_ns(ts, ns); | ||
190 | return mode; | 263 | return mode; |
191 | } | 264 | } |
192 | 265 | ||
193 | notrace static int do_monotonic(struct timespec *ts) | 266 | notrace static int __always_inline do_monotonic(struct timespec *ts) |
194 | { | 267 | { |
195 | unsigned long seq; | 268 | unsigned long seq; |
196 | u64 ns; | 269 | u64 ns; |
197 | int mode; | 270 | int mode; |
198 | 271 | ||
199 | ts->tv_nsec = 0; | ||
200 | do { | 272 | do { |
201 | seq = raw_read_seqcount_begin(>od->seq); | 273 | seq = gtod_read_begin(gtod); |
202 | mode = gtod->clock.vclock_mode; | 274 | mode = gtod->vclock_mode; |
203 | ts->tv_sec = gtod->monotonic_time_sec; | 275 | ts->tv_sec = gtod->monotonic_time_sec; |
204 | ns = gtod->monotonic_time_snsec; | 276 | ns = gtod->monotonic_time_snsec; |
205 | ns += vgetsns(&mode); | 277 | ns += vgetsns(&mode); |
206 | ns >>= gtod->clock.shift; | 278 | ns >>= gtod->shift; |
207 | } while (unlikely(read_seqcount_retry(>od->seq, seq))); | 279 | } while (unlikely(gtod_read_retry(gtod, seq))); |
208 | timespec_add_ns(ts, ns); | 280 | |
281 | ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns); | ||
282 | ts->tv_nsec = ns; | ||
209 | 283 | ||
210 | return mode; | 284 | return mode; |
211 | } | 285 | } |
212 | 286 | ||
213 | notrace static int do_realtime_coarse(struct timespec *ts) | 287 | notrace static void do_realtime_coarse(struct timespec *ts) |
214 | { | 288 | { |
215 | unsigned long seq; | 289 | unsigned long seq; |
216 | do { | 290 | do { |
217 | seq = raw_read_seqcount_begin(>od->seq); | 291 | seq = gtod_read_begin(gtod); |
218 | ts->tv_sec = gtod->wall_time_coarse.tv_sec; | 292 | ts->tv_sec = gtod->wall_time_coarse_sec; |
219 | ts->tv_nsec = gtod->wall_time_coarse.tv_nsec; | 293 | ts->tv_nsec = gtod->wall_time_coarse_nsec; |
220 | } while (unlikely(read_seqcount_retry(>od->seq, seq))); | 294 | } while (unlikely(gtod_read_retry(gtod, seq))); |
221 | return 0; | ||
222 | } | 295 | } |
223 | 296 | ||
224 | notrace static int do_monotonic_coarse(struct timespec *ts) | 297 | notrace static void do_monotonic_coarse(struct timespec *ts) |
225 | { | 298 | { |
226 | unsigned long seq; | 299 | unsigned long seq; |
227 | do { | 300 | do { |
228 | seq = raw_read_seqcount_begin(>od->seq); | 301 | seq = gtod_read_begin(gtod); |
229 | ts->tv_sec = gtod->monotonic_time_coarse.tv_sec; | 302 | ts->tv_sec = gtod->monotonic_time_coarse_sec; |
230 | ts->tv_nsec = gtod->monotonic_time_coarse.tv_nsec; | 303 | ts->tv_nsec = gtod->monotonic_time_coarse_nsec; |
231 | } while (unlikely(read_seqcount_retry(>od->seq, seq))); | 304 | } while (unlikely(gtod_read_retry(gtod, seq))); |
232 | |||
233 | return 0; | ||
234 | } | 305 | } |
235 | 306 | ||
236 | notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts) | 307 | notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts) |
237 | { | 308 | { |
238 | int ret = VCLOCK_NONE; | ||
239 | |||
240 | switch (clock) { | 309 | switch (clock) { |
241 | case CLOCK_REALTIME: | 310 | case CLOCK_REALTIME: |
242 | ret = do_realtime(ts); | 311 | if (do_realtime(ts) == VCLOCK_NONE) |
312 | goto fallback; | ||
243 | break; | 313 | break; |
244 | case CLOCK_MONOTONIC: | 314 | case CLOCK_MONOTONIC: |
245 | ret = do_monotonic(ts); | 315 | if (do_monotonic(ts) == VCLOCK_NONE) |
316 | goto fallback; | ||
246 | break; | 317 | break; |
247 | case CLOCK_REALTIME_COARSE: | 318 | case CLOCK_REALTIME_COARSE: |
248 | return do_realtime_coarse(ts); | 319 | do_realtime_coarse(ts); |
320 | break; | ||
249 | case CLOCK_MONOTONIC_COARSE: | 321 | case CLOCK_MONOTONIC_COARSE: |
250 | return do_monotonic_coarse(ts); | 322 | do_monotonic_coarse(ts); |
323 | break; | ||
324 | default: | ||
325 | goto fallback; | ||
251 | } | 326 | } |
252 | 327 | ||
253 | if (ret == VCLOCK_NONE) | ||
254 | return vdso_fallback_gettime(clock, ts); | ||
255 | return 0; | 328 | return 0; |
329 | fallback: | ||
330 | return vdso_fallback_gettime(clock, ts); | ||
256 | } | 331 | } |
257 | int clock_gettime(clockid_t, struct timespec *) | 332 | int clock_gettime(clockid_t, struct timespec *) |
258 | __attribute__((weak, alias("__vdso_clock_gettime"))); | 333 | __attribute__((weak, alias("__vdso_clock_gettime"))); |
259 | 334 | ||
260 | notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz) | 335 | notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz) |
261 | { | 336 | { |
262 | long ret = VCLOCK_NONE; | ||
263 | |||
264 | if (likely(tv != NULL)) { | 337 | if (likely(tv != NULL)) { |
265 | BUILD_BUG_ON(offsetof(struct timeval, tv_usec) != | 338 | if (unlikely(do_realtime((struct timespec *)tv) == VCLOCK_NONE)) |
266 | offsetof(struct timespec, tv_nsec) || | 339 | return vdso_fallback_gtod(tv, tz); |
267 | sizeof(*tv) != sizeof(struct timespec)); | ||
268 | ret = do_realtime((struct timespec *)tv); | ||
269 | tv->tv_usec /= 1000; | 340 | tv->tv_usec /= 1000; |
270 | } | 341 | } |
271 | if (unlikely(tz != NULL)) { | 342 | if (unlikely(tz != NULL)) { |
272 | /* Avoid memcpy. Some old compilers fail to inline it */ | 343 | tz->tz_minuteswest = gtod->tz_minuteswest; |
273 | tz->tz_minuteswest = gtod->sys_tz.tz_minuteswest; | 344 | tz->tz_dsttime = gtod->tz_dsttime; |
274 | tz->tz_dsttime = gtod->sys_tz.tz_dsttime; | ||
275 | } | 345 | } |
276 | 346 | ||
277 | if (ret == VCLOCK_NONE) | ||
278 | return vdso_fallback_gtod(tv, tz); | ||
279 | return 0; | 347 | return 0; |
280 | } | 348 | } |
281 | int gettimeofday(struct timeval *, struct timezone *) | 349 | int gettimeofday(struct timeval *, struct timezone *) |
@@ -287,8 +355,8 @@ int gettimeofday(struct timeval *, struct timezone *) | |||
287 | */ | 355 | */ |
288 | notrace time_t __vdso_time(time_t *t) | 356 | notrace time_t __vdso_time(time_t *t) |
289 | { | 357 | { |
290 | /* This is atomic on x86_64 so we don't need any locks. */ | 358 | /* This is atomic on x86 so we don't need any locks. */ |
291 | time_t result = ACCESS_ONCE(VVAR(vsyscall_gtod_data).wall_time_sec); | 359 | time_t result = ACCESS_ONCE(gtod->wall_time_sec); |
292 | 360 | ||
293 | if (t) | 361 | if (t) |
294 | *t = result; | 362 | *t = result; |
diff --git a/arch/x86/vdso/vdso-layout.lds.S b/arch/x86/vdso/vdso-layout.lds.S index 634a2cf62046..2e263f367b13 100644 --- a/arch/x86/vdso/vdso-layout.lds.S +++ b/arch/x86/vdso/vdso-layout.lds.S | |||
@@ -6,7 +6,25 @@ | |||
6 | 6 | ||
7 | SECTIONS | 7 | SECTIONS |
8 | { | 8 | { |
9 | . = VDSO_PRELINK + SIZEOF_HEADERS; | 9 | #ifdef BUILD_VDSO32 |
10 | #include <asm/vdso32.h> | ||
11 | |||
12 | .hpet_sect : { | ||
13 | hpet_page = . - VDSO_OFFSET(VDSO_HPET_PAGE); | ||
14 | } :text :hpet_sect | ||
15 | |||
16 | .vvar_sect : { | ||
17 | vvar = . - VDSO_OFFSET(VDSO_VVAR_PAGE); | ||
18 | |||
19 | /* Place all vvars at the offsets in asm/vvar.h. */ | ||
20 | #define EMIT_VVAR(name, offset) vvar_ ## name = vvar + offset; | ||
21 | #define __VVAR_KERNEL_LDS | ||
22 | #include <asm/vvar.h> | ||
23 | #undef __VVAR_KERNEL_LDS | ||
24 | #undef EMIT_VVAR | ||
25 | } :text :vvar_sect | ||
26 | #endif | ||
27 | . = SIZEOF_HEADERS; | ||
10 | 28 | ||
11 | .hash : { *(.hash) } :text | 29 | .hash : { *(.hash) } :text |
12 | .gnu.hash : { *(.gnu.hash) } | 30 | .gnu.hash : { *(.gnu.hash) } |
@@ -44,6 +62,11 @@ SECTIONS | |||
44 | . = ALIGN(0x100); | 62 | . = ALIGN(0x100); |
45 | 63 | ||
46 | .text : { *(.text*) } :text =0x90909090 | 64 | .text : { *(.text*) } :text =0x90909090 |
65 | |||
66 | /DISCARD/ : { | ||
67 | *(.discard) | ||
68 | *(.discard.*) | ||
69 | } | ||
47 | } | 70 | } |
48 | 71 | ||
49 | /* | 72 | /* |
@@ -61,4 +84,8 @@ PHDRS | |||
61 | dynamic PT_DYNAMIC FLAGS(4); /* PF_R */ | 84 | dynamic PT_DYNAMIC FLAGS(4); /* PF_R */ |
62 | note PT_NOTE FLAGS(4); /* PF_R */ | 85 | note PT_NOTE FLAGS(4); /* PF_R */ |
63 | eh_frame_hdr PT_GNU_EH_FRAME; | 86 | eh_frame_hdr PT_GNU_EH_FRAME; |
87 | #ifdef BUILD_VDSO32 | ||
88 | vvar_sect PT_NULL FLAGS(4); /* PF_R */ | ||
89 | hpet_sect PT_NULL FLAGS(4); /* PF_R */ | ||
90 | #endif | ||
64 | } | 91 | } |
diff --git a/arch/x86/vdso/vdso.S b/arch/x86/vdso/vdso.S index 1e13eb8c9656..be3f23b09af5 100644 --- a/arch/x86/vdso/vdso.S +++ b/arch/x86/vdso/vdso.S | |||
@@ -1,21 +1,3 @@ | |||
1 | #include <asm/page_types.h> | 1 | #include <asm/vdso.h> |
2 | #include <linux/linkage.h> | ||
3 | 2 | ||
4 | __PAGE_ALIGNED_DATA | 3 | DEFINE_VDSO_IMAGE(vdso, "arch/x86/vdso/vdso.so") |
5 | |||
6 | .globl vdso_start, vdso_end | ||
7 | .align PAGE_SIZE | ||
8 | vdso_start: | ||
9 | .incbin "arch/x86/vdso/vdso.so" | ||
10 | vdso_end: | ||
11 | .align PAGE_SIZE /* extra data here leaks to userspace. */ | ||
12 | |||
13 | .previous | ||
14 | |||
15 | .globl vdso_pages | ||
16 | .bss | ||
17 | .align 8 | ||
18 | .type vdso_pages, @object | ||
19 | vdso_pages: | ||
20 | .zero (vdso_end - vdso_start + PAGE_SIZE - 1) / PAGE_SIZE * 8 | ||
21 | .size vdso_pages, .-vdso_pages | ||
diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c index d6bfb876cfb0..00348980a3a6 100644 --- a/arch/x86/vdso/vdso32-setup.c +++ b/arch/x86/vdso/vdso32-setup.c | |||
@@ -16,6 +16,7 @@ | |||
16 | #include <linux/mm.h> | 16 | #include <linux/mm.h> |
17 | #include <linux/err.h> | 17 | #include <linux/err.h> |
18 | #include <linux/module.h> | 18 | #include <linux/module.h> |
19 | #include <linux/slab.h> | ||
19 | 20 | ||
20 | #include <asm/cpufeature.h> | 21 | #include <asm/cpufeature.h> |
21 | #include <asm/msr.h> | 22 | #include <asm/msr.h> |
@@ -25,17 +26,14 @@ | |||
25 | #include <asm/tlbflush.h> | 26 | #include <asm/tlbflush.h> |
26 | #include <asm/vdso.h> | 27 | #include <asm/vdso.h> |
27 | #include <asm/proto.h> | 28 | #include <asm/proto.h> |
28 | 29 | #include <asm/fixmap.h> | |
29 | enum { | 30 | #include <asm/hpet.h> |
30 | VDSO_DISABLED = 0, | 31 | #include <asm/vvar.h> |
31 | VDSO_ENABLED = 1, | ||
32 | VDSO_COMPAT = 2, | ||
33 | }; | ||
34 | 32 | ||
35 | #ifdef CONFIG_COMPAT_VDSO | 33 | #ifdef CONFIG_COMPAT_VDSO |
36 | #define VDSO_DEFAULT VDSO_COMPAT | 34 | #define VDSO_DEFAULT 0 |
37 | #else | 35 | #else |
38 | #define VDSO_DEFAULT VDSO_ENABLED | 36 | #define VDSO_DEFAULT 1 |
39 | #endif | 37 | #endif |
40 | 38 | ||
41 | #ifdef CONFIG_X86_64 | 39 | #ifdef CONFIG_X86_64 |
@@ -44,13 +42,6 @@ enum { | |||
44 | #endif | 42 | #endif |
45 | 43 | ||
46 | /* | 44 | /* |
47 | * This is the difference between the prelinked addresses in the vDSO images | ||
48 | * and the VDSO_HIGH_BASE address where CONFIG_COMPAT_VDSO places the vDSO | ||
49 | * in the user address space. | ||
50 | */ | ||
51 | #define VDSO_ADDR_ADJUST (VDSO_HIGH_BASE - (unsigned long)VDSO32_PRELINK) | ||
52 | |||
53 | /* | ||
54 | * Should the kernel map a VDSO page into processes and pass its | 45 | * Should the kernel map a VDSO page into processes and pass its |
55 | * address down to glibc upon exec()? | 46 | * address down to glibc upon exec()? |
56 | */ | 47 | */ |
@@ -60,6 +51,9 @@ static int __init vdso_setup(char *s) | |||
60 | { | 51 | { |
61 | vdso_enabled = simple_strtoul(s, NULL, 0); | 52 | vdso_enabled = simple_strtoul(s, NULL, 0); |
62 | 53 | ||
54 | if (vdso_enabled > 1) | ||
55 | pr_warn("vdso32 values other than 0 and 1 are no longer allowed; vdso disabled\n"); | ||
56 | |||
63 | return 1; | 57 | return 1; |
64 | } | 58 | } |
65 | 59 | ||
@@ -76,124 +70,8 @@ __setup_param("vdso=", vdso32_setup, vdso_setup, 0); | |||
76 | EXPORT_SYMBOL_GPL(vdso_enabled); | 70 | EXPORT_SYMBOL_GPL(vdso_enabled); |
77 | #endif | 71 | #endif |
78 | 72 | ||
79 | static __init void reloc_symtab(Elf32_Ehdr *ehdr, | 73 | static struct page **vdso32_pages; |
80 | unsigned offset, unsigned size) | 74 | static unsigned vdso32_size; |
81 | { | ||
82 | Elf32_Sym *sym = (void *)ehdr + offset; | ||
83 | unsigned nsym = size / sizeof(*sym); | ||
84 | unsigned i; | ||
85 | |||
86 | for(i = 0; i < nsym; i++, sym++) { | ||
87 | if (sym->st_shndx == SHN_UNDEF || | ||
88 | sym->st_shndx == SHN_ABS) | ||
89 | continue; /* skip */ | ||
90 | |||
91 | if (sym->st_shndx > SHN_LORESERVE) { | ||
92 | printk(KERN_INFO "VDSO: unexpected st_shndx %x\n", | ||
93 | sym->st_shndx); | ||
94 | continue; | ||
95 | } | ||
96 | |||
97 | switch(ELF_ST_TYPE(sym->st_info)) { | ||
98 | case STT_OBJECT: | ||
99 | case STT_FUNC: | ||
100 | case STT_SECTION: | ||
101 | case STT_FILE: | ||
102 | sym->st_value += VDSO_ADDR_ADJUST; | ||
103 | } | ||
104 | } | ||
105 | } | ||
106 | |||
107 | static __init void reloc_dyn(Elf32_Ehdr *ehdr, unsigned offset) | ||
108 | { | ||
109 | Elf32_Dyn *dyn = (void *)ehdr + offset; | ||
110 | |||
111 | for(; dyn->d_tag != DT_NULL; dyn++) | ||
112 | switch(dyn->d_tag) { | ||
113 | case DT_PLTGOT: | ||
114 | case DT_HASH: | ||
115 | case DT_STRTAB: | ||
116 | case DT_SYMTAB: | ||
117 | case DT_RELA: | ||
118 | case DT_INIT: | ||
119 | case DT_FINI: | ||
120 | case DT_REL: | ||
121 | case DT_DEBUG: | ||
122 | case DT_JMPREL: | ||
123 | case DT_VERSYM: | ||
124 | case DT_VERDEF: | ||
125 | case DT_VERNEED: | ||
126 | case DT_ADDRRNGLO ... DT_ADDRRNGHI: | ||
127 | /* definitely pointers needing relocation */ | ||
128 | dyn->d_un.d_ptr += VDSO_ADDR_ADJUST; | ||
129 | break; | ||
130 | |||
131 | case DT_ENCODING ... OLD_DT_LOOS-1: | ||
132 | case DT_LOOS ... DT_HIOS-1: | ||
133 | /* Tags above DT_ENCODING are pointers if | ||
134 | they're even */ | ||
135 | if (dyn->d_tag >= DT_ENCODING && | ||
136 | (dyn->d_tag & 1) == 0) | ||
137 | dyn->d_un.d_ptr += VDSO_ADDR_ADJUST; | ||
138 | break; | ||
139 | |||
140 | case DT_VERDEFNUM: | ||
141 | case DT_VERNEEDNUM: | ||
142 | case DT_FLAGS_1: | ||
143 | case DT_RELACOUNT: | ||
144 | case DT_RELCOUNT: | ||
145 | case DT_VALRNGLO ... DT_VALRNGHI: | ||
146 | /* definitely not pointers */ | ||
147 | break; | ||
148 | |||
149 | case OLD_DT_LOOS ... DT_LOOS-1: | ||
150 | case DT_HIOS ... DT_VALRNGLO-1: | ||
151 | default: | ||
152 | if (dyn->d_tag > DT_ENCODING) | ||
153 | printk(KERN_INFO "VDSO: unexpected DT_tag %x\n", | ||
154 | dyn->d_tag); | ||
155 | break; | ||
156 | } | ||
157 | } | ||
158 | |||
159 | static __init void relocate_vdso(Elf32_Ehdr *ehdr) | ||
160 | { | ||
161 | Elf32_Phdr *phdr; | ||
162 | Elf32_Shdr *shdr; | ||
163 | int i; | ||
164 | |||
165 | BUG_ON(memcmp(ehdr->e_ident, ELFMAG, SELFMAG) != 0 || | ||
166 | !elf_check_arch_ia32(ehdr) || | ||
167 | ehdr->e_type != ET_DYN); | ||
168 | |||
169 | ehdr->e_entry += VDSO_ADDR_ADJUST; | ||
170 | |||
171 | /* rebase phdrs */ | ||
172 | phdr = (void *)ehdr + ehdr->e_phoff; | ||
173 | for (i = 0; i < ehdr->e_phnum; i++) { | ||
174 | phdr[i].p_vaddr += VDSO_ADDR_ADJUST; | ||
175 | |||
176 | /* relocate dynamic stuff */ | ||
177 | if (phdr[i].p_type == PT_DYNAMIC) | ||
178 | reloc_dyn(ehdr, phdr[i].p_offset); | ||
179 | } | ||
180 | |||
181 | /* rebase sections */ | ||
182 | shdr = (void *)ehdr + ehdr->e_shoff; | ||
183 | for(i = 0; i < ehdr->e_shnum; i++) { | ||
184 | if (!(shdr[i].sh_flags & SHF_ALLOC)) | ||
185 | continue; | ||
186 | |||
187 | shdr[i].sh_addr += VDSO_ADDR_ADJUST; | ||
188 | |||
189 | if (shdr[i].sh_type == SHT_SYMTAB || | ||
190 | shdr[i].sh_type == SHT_DYNSYM) | ||
191 | reloc_symtab(ehdr, shdr[i].sh_offset, | ||
192 | shdr[i].sh_size); | ||
193 | } | ||
194 | } | ||
195 | |||
196 | static struct page *vdso32_pages[1]; | ||
197 | 75 | ||
198 | #ifdef CONFIG_X86_64 | 76 | #ifdef CONFIG_X86_64 |
199 | 77 | ||
@@ -212,12 +90,6 @@ void syscall32_cpu_init(void) | |||
212 | wrmsrl(MSR_CSTAR, ia32_cstar_target); | 90 | wrmsrl(MSR_CSTAR, ia32_cstar_target); |
213 | } | 91 | } |
214 | 92 | ||
215 | #define compat_uses_vma 1 | ||
216 | |||
217 | static inline void map_compat_vdso(int map) | ||
218 | { | ||
219 | } | ||
220 | |||
221 | #else /* CONFIG_X86_32 */ | 93 | #else /* CONFIG_X86_32 */ |
222 | 94 | ||
223 | #define vdso32_sysenter() (boot_cpu_has(X86_FEATURE_SEP)) | 95 | #define vdso32_sysenter() (boot_cpu_has(X86_FEATURE_SEP)) |
@@ -241,64 +113,36 @@ void enable_sep_cpu(void) | |||
241 | put_cpu(); | 113 | put_cpu(); |
242 | } | 114 | } |
243 | 115 | ||
244 | static struct vm_area_struct gate_vma; | ||
245 | |||
246 | static int __init gate_vma_init(void) | ||
247 | { | ||
248 | gate_vma.vm_mm = NULL; | ||
249 | gate_vma.vm_start = FIXADDR_USER_START; | ||
250 | gate_vma.vm_end = FIXADDR_USER_END; | ||
251 | gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC; | ||
252 | gate_vma.vm_page_prot = __P101; | ||
253 | |||
254 | return 0; | ||
255 | } | ||
256 | |||
257 | #define compat_uses_vma 0 | ||
258 | |||
259 | static void map_compat_vdso(int map) | ||
260 | { | ||
261 | static int vdso_mapped; | ||
262 | |||
263 | if (map == vdso_mapped) | ||
264 | return; | ||
265 | |||
266 | vdso_mapped = map; | ||
267 | |||
268 | __set_fixmap(FIX_VDSO, page_to_pfn(vdso32_pages[0]) << PAGE_SHIFT, | ||
269 | map ? PAGE_READONLY_EXEC : PAGE_NONE); | ||
270 | |||
271 | /* flush stray tlbs */ | ||
272 | flush_tlb_all(); | ||
273 | } | ||
274 | |||
275 | #endif /* CONFIG_X86_64 */ | 116 | #endif /* CONFIG_X86_64 */ |
276 | 117 | ||
277 | int __init sysenter_setup(void) | 118 | int __init sysenter_setup(void) |
278 | { | 119 | { |
279 | void *syscall_page = (void *)get_zeroed_page(GFP_ATOMIC); | 120 | char *vdso32_start, *vdso32_end; |
280 | const void *vsyscall; | 121 | int npages, i; |
281 | size_t vsyscall_len; | ||
282 | |||
283 | vdso32_pages[0] = virt_to_page(syscall_page); | ||
284 | |||
285 | #ifdef CONFIG_X86_32 | ||
286 | gate_vma_init(); | ||
287 | #endif | ||
288 | 122 | ||
123 | #ifdef CONFIG_COMPAT | ||
289 | if (vdso32_syscall()) { | 124 | if (vdso32_syscall()) { |
290 | vsyscall = &vdso32_syscall_start; | 125 | vdso32_start = vdso32_syscall_start; |
291 | vsyscall_len = &vdso32_syscall_end - &vdso32_syscall_start; | 126 | vdso32_end = vdso32_syscall_end; |
292 | } else if (vdso32_sysenter()){ | 127 | vdso32_pages = vdso32_syscall_pages; |
293 | vsyscall = &vdso32_sysenter_start; | 128 | } else |
294 | vsyscall_len = &vdso32_sysenter_end - &vdso32_sysenter_start; | 129 | #endif |
130 | if (vdso32_sysenter()) { | ||
131 | vdso32_start = vdso32_sysenter_start; | ||
132 | vdso32_end = vdso32_sysenter_end; | ||
133 | vdso32_pages = vdso32_sysenter_pages; | ||
295 | } else { | 134 | } else { |
296 | vsyscall = &vdso32_int80_start; | 135 | vdso32_start = vdso32_int80_start; |
297 | vsyscall_len = &vdso32_int80_end - &vdso32_int80_start; | 136 | vdso32_end = vdso32_int80_end; |
137 | vdso32_pages = vdso32_int80_pages; | ||
298 | } | 138 | } |
299 | 139 | ||
300 | memcpy(syscall_page, vsyscall, vsyscall_len); | 140 | npages = ((vdso32_end - vdso32_start) + PAGE_SIZE - 1) / PAGE_SIZE; |
301 | relocate_vdso(syscall_page); | 141 | vdso32_size = npages << PAGE_SHIFT; |
142 | for (i = 0; i < npages; i++) | ||
143 | vdso32_pages[i] = virt_to_page(vdso32_start + i*PAGE_SIZE); | ||
144 | |||
145 | patch_vdso32(vdso32_start, vdso32_size); | ||
302 | 146 | ||
303 | return 0; | 147 | return 0; |
304 | } | 148 | } |
@@ -309,48 +153,73 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) | |||
309 | struct mm_struct *mm = current->mm; | 153 | struct mm_struct *mm = current->mm; |
310 | unsigned long addr; | 154 | unsigned long addr; |
311 | int ret = 0; | 155 | int ret = 0; |
312 | bool compat; | 156 | struct vm_area_struct *vma; |
313 | 157 | ||
314 | #ifdef CONFIG_X86_X32_ABI | 158 | #ifdef CONFIG_X86_X32_ABI |
315 | if (test_thread_flag(TIF_X32)) | 159 | if (test_thread_flag(TIF_X32)) |
316 | return x32_setup_additional_pages(bprm, uses_interp); | 160 | return x32_setup_additional_pages(bprm, uses_interp); |
317 | #endif | 161 | #endif |
318 | 162 | ||
319 | if (vdso_enabled == VDSO_DISABLED) | 163 | if (vdso_enabled != 1) /* Other values all mean "disabled" */ |
320 | return 0; | 164 | return 0; |
321 | 165 | ||
322 | down_write(&mm->mmap_sem); | 166 | down_write(&mm->mmap_sem); |
323 | 167 | ||
324 | /* Test compat mode once here, in case someone | 168 | addr = get_unmapped_area(NULL, 0, vdso32_size + VDSO_OFFSET(VDSO_PREV_PAGES), 0, 0); |
325 | changes it via sysctl */ | 169 | if (IS_ERR_VALUE(addr)) { |
326 | compat = (vdso_enabled == VDSO_COMPAT); | 170 | ret = addr; |
171 | goto up_fail; | ||
172 | } | ||
173 | |||
174 | addr += VDSO_OFFSET(VDSO_PREV_PAGES); | ||
327 | 175 | ||
328 | map_compat_vdso(compat); | 176 | current->mm->context.vdso = (void *)addr; |
329 | 177 | ||
330 | if (compat) | 178 | /* |
331 | addr = VDSO_HIGH_BASE; | 179 | * MAYWRITE to allow gdb to COW and set breakpoints |
332 | else { | 180 | */ |
333 | addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0); | 181 | ret = install_special_mapping(mm, |
334 | if (IS_ERR_VALUE(addr)) { | 182 | addr, |
335 | ret = addr; | 183 | vdso32_size, |
336 | goto up_fail; | 184 | VM_READ|VM_EXEC| |
337 | } | 185 | VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, |
186 | vdso32_pages); | ||
187 | |||
188 | if (ret) | ||
189 | goto up_fail; | ||
190 | |||
191 | vma = _install_special_mapping(mm, | ||
192 | addr - VDSO_OFFSET(VDSO_PREV_PAGES), | ||
193 | VDSO_OFFSET(VDSO_PREV_PAGES), | ||
194 | VM_READ, | ||
195 | NULL); | ||
196 | |||
197 | if (IS_ERR(vma)) { | ||
198 | ret = PTR_ERR(vma); | ||
199 | goto up_fail; | ||
338 | } | 200 | } |
339 | 201 | ||
340 | current->mm->context.vdso = (void *)addr; | 202 | ret = remap_pfn_range(vma, |
203 | addr - VDSO_OFFSET(VDSO_VVAR_PAGE), | ||
204 | __pa_symbol(&__vvar_page) >> PAGE_SHIFT, | ||
205 | PAGE_SIZE, | ||
206 | PAGE_READONLY); | ||
207 | |||
208 | if (ret) | ||
209 | goto up_fail; | ||
341 | 210 | ||
342 | if (compat_uses_vma || !compat) { | 211 | #ifdef CONFIG_HPET_TIMER |
343 | /* | 212 | if (hpet_address) { |
344 | * MAYWRITE to allow gdb to COW and set breakpoints | 213 | ret = io_remap_pfn_range(vma, |
345 | */ | 214 | addr - VDSO_OFFSET(VDSO_HPET_PAGE), |
346 | ret = install_special_mapping(mm, addr, PAGE_SIZE, | 215 | hpet_address >> PAGE_SHIFT, |
347 | VM_READ|VM_EXEC| | 216 | PAGE_SIZE, |
348 | VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, | 217 | pgprot_noncached(PAGE_READONLY)); |
349 | vdso32_pages); | ||
350 | 218 | ||
351 | if (ret) | 219 | if (ret) |
352 | goto up_fail; | 220 | goto up_fail; |
353 | } | 221 | } |
222 | #endif | ||
354 | 223 | ||
355 | current_thread_info()->sysenter_return = | 224 | current_thread_info()->sysenter_return = |
356 | VDSO32_SYMBOL(addr, SYSENTER_RETURN); | 225 | VDSO32_SYMBOL(addr, SYSENTER_RETURN); |
@@ -411,20 +280,12 @@ const char *arch_vma_name(struct vm_area_struct *vma) | |||
411 | 280 | ||
412 | struct vm_area_struct *get_gate_vma(struct mm_struct *mm) | 281 | struct vm_area_struct *get_gate_vma(struct mm_struct *mm) |
413 | { | 282 | { |
414 | /* | ||
415 | * Check to see if the corresponding task was created in compat vdso | ||
416 | * mode. | ||
417 | */ | ||
418 | if (mm && mm->context.vdso == (void *)VDSO_HIGH_BASE) | ||
419 | return &gate_vma; | ||
420 | return NULL; | 283 | return NULL; |
421 | } | 284 | } |
422 | 285 | ||
423 | int in_gate_area(struct mm_struct *mm, unsigned long addr) | 286 | int in_gate_area(struct mm_struct *mm, unsigned long addr) |
424 | { | 287 | { |
425 | const struct vm_area_struct *vma = get_gate_vma(mm); | 288 | return 0; |
426 | |||
427 | return vma && addr >= vma->vm_start && addr < vma->vm_end; | ||
428 | } | 289 | } |
429 | 290 | ||
430 | int in_gate_area_no_mm(unsigned long addr) | 291 | int in_gate_area_no_mm(unsigned long addr) |
diff --git a/arch/x86/vdso/vdso32.S b/arch/x86/vdso/vdso32.S index 2ce5f82c333b..018bcd9f97b4 100644 --- a/arch/x86/vdso/vdso32.S +++ b/arch/x86/vdso/vdso32.S | |||
@@ -1,22 +1,9 @@ | |||
1 | #include <linux/init.h> | 1 | #include <asm/vdso.h> |
2 | 2 | ||
3 | __INITDATA | 3 | DEFINE_VDSO_IMAGE(vdso32_int80, "arch/x86/vdso/vdso32-int80.so") |
4 | 4 | ||
5 | .globl vdso32_int80_start, vdso32_int80_end | ||
6 | vdso32_int80_start: | ||
7 | .incbin "arch/x86/vdso/vdso32-int80.so" | ||
8 | vdso32_int80_end: | ||
9 | |||
10 | .globl vdso32_syscall_start, vdso32_syscall_end | ||
11 | vdso32_syscall_start: | ||
12 | #ifdef CONFIG_COMPAT | 5 | #ifdef CONFIG_COMPAT |
13 | .incbin "arch/x86/vdso/vdso32-syscall.so" | 6 | DEFINE_VDSO_IMAGE(vdso32_syscall, "arch/x86/vdso/vdso32-syscall.so") |
14 | #endif | 7 | #endif |
15 | vdso32_syscall_end: | ||
16 | |||
17 | .globl vdso32_sysenter_start, vdso32_sysenter_end | ||
18 | vdso32_sysenter_start: | ||
19 | .incbin "arch/x86/vdso/vdso32-sysenter.so" | ||
20 | vdso32_sysenter_end: | ||
21 | 8 | ||
22 | __FINIT | 9 | DEFINE_VDSO_IMAGE(vdso32_sysenter, "arch/x86/vdso/vdso32-sysenter.so") |
diff --git a/arch/x86/vdso/vdso32/vclock_gettime.c b/arch/x86/vdso/vdso32/vclock_gettime.c new file mode 100644 index 000000000000..175cc72c0f68 --- /dev/null +++ b/arch/x86/vdso/vdso32/vclock_gettime.c | |||
@@ -0,0 +1,30 @@ | |||
1 | #define BUILD_VDSO32 | ||
2 | |||
3 | #ifndef CONFIG_CC_OPTIMIZE_FOR_SIZE | ||
4 | #undef CONFIG_OPTIMIZE_INLINING | ||
5 | #endif | ||
6 | |||
7 | #undef CONFIG_X86_PPRO_FENCE | ||
8 | |||
9 | #ifdef CONFIG_X86_64 | ||
10 | |||
11 | /* | ||
12 | * in case of a 32 bit VDSO for a 64 bit kernel fake a 32 bit kernel | ||
13 | * configuration | ||
14 | */ | ||
15 | #undef CONFIG_64BIT | ||
16 | #undef CONFIG_X86_64 | ||
17 | #undef CONFIG_ILLEGAL_POINTER_VALUE | ||
18 | #undef CONFIG_SPARSEMEM_VMEMMAP | ||
19 | #undef CONFIG_NR_CPUS | ||
20 | |||
21 | #define CONFIG_X86_32 1 | ||
22 | #define CONFIG_PAGE_OFFSET 0 | ||
23 | #define CONFIG_ILLEGAL_POINTER_VALUE 0 | ||
24 | #define CONFIG_NR_CPUS 1 | ||
25 | |||
26 | #define BUILD_VDSO32_64 | ||
27 | |||
28 | #endif | ||
29 | |||
30 | #include "../vclock_gettime.c" | ||
diff --git a/arch/x86/vdso/vdso32/vdso32.lds.S b/arch/x86/vdso/vdso32/vdso32.lds.S index 976124bb5f92..aadb8b9994cd 100644 --- a/arch/x86/vdso/vdso32/vdso32.lds.S +++ b/arch/x86/vdso/vdso32/vdso32.lds.S | |||
@@ -8,7 +8,11 @@ | |||
8 | * values visible using the asm-x86/vdso.h macros from the kernel proper. | 8 | * values visible using the asm-x86/vdso.h macros from the kernel proper. |
9 | */ | 9 | */ |
10 | 10 | ||
11 | #include <asm/page.h> | ||
12 | |||
13 | #define BUILD_VDSO32 | ||
11 | #define VDSO_PRELINK 0 | 14 | #define VDSO_PRELINK 0 |
15 | |||
12 | #include "../vdso-layout.lds.S" | 16 | #include "../vdso-layout.lds.S" |
13 | 17 | ||
14 | /* The ELF entry point can be used to set the AT_SYSINFO value. */ | 18 | /* The ELF entry point can be used to set the AT_SYSINFO value. */ |
@@ -19,6 +23,13 @@ ENTRY(__kernel_vsyscall); | |||
19 | */ | 23 | */ |
20 | VERSION | 24 | VERSION |
21 | { | 25 | { |
26 | LINUX_2.6 { | ||
27 | global: | ||
28 | __vdso_clock_gettime; | ||
29 | __vdso_gettimeofday; | ||
30 | __vdso_time; | ||
31 | }; | ||
32 | |||
22 | LINUX_2.5 { | 33 | LINUX_2.5 { |
23 | global: | 34 | global: |
24 | __kernel_vsyscall; | 35 | __kernel_vsyscall; |
@@ -31,7 +42,9 @@ VERSION | |||
31 | /* | 42 | /* |
32 | * Symbols we define here called VDSO* get their values into vdso32-syms.h. | 43 | * Symbols we define here called VDSO* get their values into vdso32-syms.h. |
33 | */ | 44 | */ |
34 | VDSO32_PRELINK = VDSO_PRELINK; | ||
35 | VDSO32_vsyscall = __kernel_vsyscall; | 45 | VDSO32_vsyscall = __kernel_vsyscall; |
36 | VDSO32_sigreturn = __kernel_sigreturn; | 46 | VDSO32_sigreturn = __kernel_sigreturn; |
37 | VDSO32_rt_sigreturn = __kernel_rt_sigreturn; | 47 | VDSO32_rt_sigreturn = __kernel_rt_sigreturn; |
48 | VDSO32_clock_gettime = clock_gettime; | ||
49 | VDSO32_gettimeofday = gettimeofday; | ||
50 | VDSO32_time = time; | ||
diff --git a/arch/x86/vdso/vdsox32.S b/arch/x86/vdso/vdsox32.S index 295f1c7543d8..f4aa34e7f370 100644 --- a/arch/x86/vdso/vdsox32.S +++ b/arch/x86/vdso/vdsox32.S | |||
@@ -1,21 +1,3 @@ | |||
1 | #include <asm/page_types.h> | 1 | #include <asm/vdso.h> |
2 | #include <linux/linkage.h> | ||
3 | 2 | ||
4 | __PAGE_ALIGNED_DATA | 3 | DEFINE_VDSO_IMAGE(vdsox32, "arch/x86/vdso/vdsox32.so") |
5 | |||
6 | .globl vdsox32_start, vdsox32_end | ||
7 | .align PAGE_SIZE | ||
8 | vdsox32_start: | ||
9 | .incbin "arch/x86/vdso/vdsox32.so" | ||
10 | vdsox32_end: | ||
11 | .align PAGE_SIZE /* extra data here leaks to userspace. */ | ||
12 | |||
13 | .previous | ||
14 | |||
15 | .globl vdsox32_pages | ||
16 | .bss | ||
17 | .align 8 | ||
18 | .type vdsox32_pages, @object | ||
19 | vdsox32_pages: | ||
20 | .zero (vdsox32_end - vdsox32_start + PAGE_SIZE - 1) / PAGE_SIZE * 8 | ||
21 | .size vdsox32_pages, .-vdsox32_pages | ||
diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c index 431e87544411..1ad102613127 100644 --- a/arch/x86/vdso/vma.c +++ b/arch/x86/vdso/vma.c | |||
@@ -16,20 +16,22 @@ | |||
16 | #include <asm/vdso.h> | 16 | #include <asm/vdso.h> |
17 | #include <asm/page.h> | 17 | #include <asm/page.h> |
18 | 18 | ||
19 | #if defined(CONFIG_X86_64) | ||
19 | unsigned int __read_mostly vdso_enabled = 1; | 20 | unsigned int __read_mostly vdso_enabled = 1; |
20 | 21 | ||
21 | extern char vdso_start[], vdso_end[]; | 22 | DECLARE_VDSO_IMAGE(vdso); |
22 | extern unsigned short vdso_sync_cpuid; | 23 | extern unsigned short vdso_sync_cpuid; |
23 | |||
24 | extern struct page *vdso_pages[]; | ||
25 | static unsigned vdso_size; | 24 | static unsigned vdso_size; |
26 | 25 | ||
27 | #ifdef CONFIG_X86_X32_ABI | 26 | #ifdef CONFIG_X86_X32_ABI |
28 | extern char vdsox32_start[], vdsox32_end[]; | 27 | DECLARE_VDSO_IMAGE(vdsox32); |
29 | extern struct page *vdsox32_pages[]; | ||
30 | static unsigned vdsox32_size; | 28 | static unsigned vdsox32_size; |
29 | #endif | ||
30 | #endif | ||
31 | 31 | ||
32 | static void __init patch_vdsox32(void *vdso, size_t len) | 32 | #if defined(CONFIG_X86_32) || defined(CONFIG_X86_X32_ABI) || \ |
33 | defined(CONFIG_COMPAT) | ||
34 | void __init patch_vdso32(void *vdso, size_t len) | ||
33 | { | 35 | { |
34 | Elf32_Ehdr *hdr = vdso; | 36 | Elf32_Ehdr *hdr = vdso; |
35 | Elf32_Shdr *sechdrs, *alt_sec = 0; | 37 | Elf32_Shdr *sechdrs, *alt_sec = 0; |
@@ -52,7 +54,7 @@ static void __init patch_vdsox32(void *vdso, size_t len) | |||
52 | } | 54 | } |
53 | 55 | ||
54 | /* If we get here, it's probably a bug. */ | 56 | /* If we get here, it's probably a bug. */ |
55 | pr_warning("patch_vdsox32: .altinstructions not found\n"); | 57 | pr_warning("patch_vdso32: .altinstructions not found\n"); |
56 | return; /* nothing to patch */ | 58 | return; /* nothing to patch */ |
57 | 59 | ||
58 | found: | 60 | found: |
@@ -61,6 +63,7 @@ found: | |||
61 | } | 63 | } |
62 | #endif | 64 | #endif |
63 | 65 | ||
66 | #if defined(CONFIG_X86_64) | ||
64 | static void __init patch_vdso64(void *vdso, size_t len) | 67 | static void __init patch_vdso64(void *vdso, size_t len) |
65 | { | 68 | { |
66 | Elf64_Ehdr *hdr = vdso; | 69 | Elf64_Ehdr *hdr = vdso; |
@@ -104,7 +107,7 @@ static int __init init_vdso(void) | |||
104 | vdso_pages[i] = virt_to_page(vdso_start + i*PAGE_SIZE); | 107 | vdso_pages[i] = virt_to_page(vdso_start + i*PAGE_SIZE); |
105 | 108 | ||
106 | #ifdef CONFIG_X86_X32_ABI | 109 | #ifdef CONFIG_X86_X32_ABI |
107 | patch_vdsox32(vdsox32_start, vdsox32_end - vdsox32_start); | 110 | patch_vdso32(vdsox32_start, vdsox32_end - vdsox32_start); |
108 | npages = (vdsox32_end - vdsox32_start + PAGE_SIZE - 1) / PAGE_SIZE; | 111 | npages = (vdsox32_end - vdsox32_start + PAGE_SIZE - 1) / PAGE_SIZE; |
109 | vdsox32_size = npages << PAGE_SHIFT; | 112 | vdsox32_size = npages << PAGE_SHIFT; |
110 | for (i = 0; i < npages; i++) | 113 | for (i = 0; i < npages; i++) |
@@ -204,3 +207,4 @@ static __init int vdso_setup(char *s) | |||
204 | return 0; | 207 | return 0; |
205 | } | 208 | } |
206 | __setup("vdso=", vdso_setup); | 209 | __setup("vdso=", vdso_setup); |
210 | #endif | ||
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 2423ef04ffea..86e02eabb640 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c | |||
@@ -2058,7 +2058,6 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot) | |||
2058 | case FIX_RO_IDT: | 2058 | case FIX_RO_IDT: |
2059 | #ifdef CONFIG_X86_32 | 2059 | #ifdef CONFIG_X86_32 |
2060 | case FIX_WP_TEST: | 2060 | case FIX_WP_TEST: |
2061 | case FIX_VDSO: | ||
2062 | # ifdef CONFIG_HIGHMEM | 2061 | # ifdef CONFIG_HIGHMEM |
2063 | case FIX_KMAP_BEGIN ... FIX_KMAP_END: | 2062 | case FIX_KMAP_BEGIN ... FIX_KMAP_END: |
2064 | # endif | 2063 | # endif |
diff --git a/include/linux/mm.h b/include/linux/mm.h index a0df4295e171..2eec61fe75c9 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h | |||
@@ -1756,6 +1756,9 @@ extern void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file); | |||
1756 | extern struct file *get_mm_exe_file(struct mm_struct *mm); | 1756 | extern struct file *get_mm_exe_file(struct mm_struct *mm); |
1757 | 1757 | ||
1758 | extern int may_expand_vm(struct mm_struct *mm, unsigned long npages); | 1758 | extern int may_expand_vm(struct mm_struct *mm, unsigned long npages); |
1759 | extern struct vm_area_struct *_install_special_mapping(struct mm_struct *mm, | ||
1760 | unsigned long addr, unsigned long len, | ||
1761 | unsigned long flags, struct page **pages); | ||
1759 | extern int install_special_mapping(struct mm_struct *mm, | 1762 | extern int install_special_mapping(struct mm_struct *mm, |
1760 | unsigned long addr, unsigned long len, | 1763 | unsigned long addr, unsigned long len, |
1761 | unsigned long flags, struct page **pages); | 1764 | unsigned long flags, struct page **pages); |
@@ -2918,7 +2918,7 @@ static const struct vm_operations_struct special_mapping_vmops = { | |||
2918 | * The array pointer and the pages it points to are assumed to stay alive | 2918 | * The array pointer and the pages it points to are assumed to stay alive |
2919 | * for as long as this mapping might exist. | 2919 | * for as long as this mapping might exist. |
2920 | */ | 2920 | */ |
2921 | int install_special_mapping(struct mm_struct *mm, | 2921 | struct vm_area_struct *_install_special_mapping(struct mm_struct *mm, |
2922 | unsigned long addr, unsigned long len, | 2922 | unsigned long addr, unsigned long len, |
2923 | unsigned long vm_flags, struct page **pages) | 2923 | unsigned long vm_flags, struct page **pages) |
2924 | { | 2924 | { |
@@ -2927,7 +2927,7 @@ int install_special_mapping(struct mm_struct *mm, | |||
2927 | 2927 | ||
2928 | vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); | 2928 | vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); |
2929 | if (unlikely(vma == NULL)) | 2929 | if (unlikely(vma == NULL)) |
2930 | return -ENOMEM; | 2930 | return ERR_PTR(-ENOMEM); |
2931 | 2931 | ||
2932 | INIT_LIST_HEAD(&vma->anon_vma_chain); | 2932 | INIT_LIST_HEAD(&vma->anon_vma_chain); |
2933 | vma->vm_mm = mm; | 2933 | vma->vm_mm = mm; |
@@ -2948,11 +2948,23 @@ int install_special_mapping(struct mm_struct *mm, | |||
2948 | 2948 | ||
2949 | perf_event_mmap(vma); | 2949 | perf_event_mmap(vma); |
2950 | 2950 | ||
2951 | return 0; | 2951 | return vma; |
2952 | 2952 | ||
2953 | out: | 2953 | out: |
2954 | kmem_cache_free(vm_area_cachep, vma); | 2954 | kmem_cache_free(vm_area_cachep, vma); |
2955 | return ret; | 2955 | return ERR_PTR(ret); |
2956 | } | ||
2957 | |||
2958 | int install_special_mapping(struct mm_struct *mm, | ||
2959 | unsigned long addr, unsigned long len, | ||
2960 | unsigned long vm_flags, struct page **pages) | ||
2961 | { | ||
2962 | struct vm_area_struct *vma = _install_special_mapping(mm, | ||
2963 | addr, len, vm_flags, pages); | ||
2964 | |||
2965 | if (IS_ERR(vma)) | ||
2966 | return PTR_ERR(vma); | ||
2967 | return 0; | ||
2956 | } | 2968 | } |
2957 | 2969 | ||
2958 | static DEFINE_MUTEX(mm_all_locks_mutex); | 2970 | static DEFINE_MUTEX(mm_all_locks_mutex); |