aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-04-02 15:26:43 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-04-02 15:26:43 -0400
commitc6f21243ce1e8d81ad8361da4d2eaa5947b667c4 (patch)
tree5157ca1de2e7a5371575fff92ce23f0d09e3e7ea
parent9447dc43941cd1c006cae934984524b7c957b803 (diff)
parent37c975545ec63320789962bf307f000f08fabd48 (diff)
Merge branch 'x86-vdso-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 vdso changes from Peter Anvin: "This is the revamp of the 32-bit vdso and the associated cleanups. This adds timekeeping support to the 32-bit vdso that we already have in the 64-bit vdso. Although 32-bit x86 is legacy, it is likely to remain in the embedded space for a very long time to come. This removes the traditional COMPAT_VDSO support; the configuration variable is reused for simply removing the 32-bit vdso, which will produce correct results but obviously suffer a performance penalty. Only one beta version of glibc was affected, but that version was unfortunately included in one OpenSUSE release. This is not the end of the vdso cleanups. Stefani and Andy have agreed to continue work for the next kernel cycle; in fact Andy has already produced another set of cleanups that came too late for this cycle. An incidental, but arguably important, change is that this ensures that unused space in the VVAR page is properly zeroed. It wasn't before, and would contain whatever garbage was left in memory by BIOS or the bootloader. Since the VVAR page is accessible to user space this had the potential of information leaks" * 'x86-vdso-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (23 commits) x86, vdso: Fix the symbol versions on the 32-bit vDSO x86, vdso, build: Don't rebuild 32-bit vdsos on every make x86, vdso: Actually discard the .discard sections x86, vdso: Fix size of get_unmapped_area() x86, vdso: Finish removing VDSO32_PRELINK x86, vdso: Move more vdso definitions into vdso.h x86: Load the 32-bit vdso in place, just like the 64-bit vdsos x86, vdso32: handle 32 bit vDSO larger one page x86, vdso32: Disable stack protector, adjust optimizations x86, vdso: Zero-pad the VVAR page x86, vdso: Add 32 bit VDSO time support for 64 bit kernel x86, vdso: Add 32 bit VDSO time support for 32 bit kernel x86, vdso: Patch alternatives in the 32-bit VDSO x86, vdso: Introduce VVAR marco for vdso32 x86, vdso: Cleanup __vdso_gettimeofday() x86, vdso: Replace VVAR(vsyscall_gtod_data) by gtod macro x86, vdso: __vdso_clock_gettime() cleanup x86, vdso: Revamp vclock_gettime.c mm: Add new func _install_special_mapping() to mmap.c x86, vdso: Make vsyscall_gtod_data handling x86 generic ...
-rw-r--r--Documentation/kernel-parameters.txt22
-rw-r--r--arch/x86/Kconfig30
-rw-r--r--arch/x86/include/asm/clocksource.h4
-rw-r--r--arch/x86/include/asm/elf.h4
-rw-r--r--arch/x86/include/asm/fixmap.h8
-rw-r--r--arch/x86/include/asm/pgtable_types.h7
-rw-r--r--arch/x86/include/asm/vdso.h52
-rw-r--r--arch/x86/include/asm/vdso32.h11
-rw-r--r--arch/x86/include/asm/vgtod.h71
-rw-r--r--arch/x86/include/asm/vvar.h29
-rw-r--r--arch/x86/kernel/Makefile2
-rw-r--r--arch/x86/kernel/hpet.c2
-rw-r--r--arch/x86/kernel/tsc.c2
-rw-r--r--arch/x86/kernel/vmlinux.lds.S8
-rw-r--r--arch/x86/kernel/vsyscall_64.c45
-rw-r--r--arch/x86/kernel/vsyscall_gtod.c69
-rw-r--r--arch/x86/tools/relocs.c2
-rw-r--r--arch/x86/vdso/Makefile16
-rw-r--r--arch/x86/vdso/vclock_gettime.c256
-rw-r--r--arch/x86/vdso/vdso-layout.lds.S29
-rw-r--r--arch/x86/vdso/vdso.S22
-rw-r--r--arch/x86/vdso/vdso32-setup.c301
-rw-r--r--arch/x86/vdso/vdso32.S21
-rw-r--r--arch/x86/vdso/vdso32/vclock_gettime.c30
-rw-r--r--arch/x86/vdso/vdso32/vdso32.lds.S15
-rw-r--r--arch/x86/vdso/vdsox32.S22
-rw-r--r--arch/x86/vdso/vma.c20
-rw-r--r--arch/x86/xen/mmu.c1
-rw-r--r--include/linux/mm.h3
-rw-r--r--mm/mmap.c20
30 files changed, 617 insertions, 507 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 121d5fcbd94a..2311dad7a57a 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -3424,14 +3424,24 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
3424 of CONFIG_HIGHPTE. 3424 of CONFIG_HIGHPTE.
3425 3425
3426 vdso= [X86,SH] 3426 vdso= [X86,SH]
3427 vdso=2: enable compat VDSO (default with COMPAT_VDSO) 3427 On X86_32, this is an alias for vdso32=. Otherwise:
3428 vdso=1: enable VDSO (default) 3428
3429 vdso=1: enable VDSO (the default)
3429 vdso=0: disable VDSO mapping 3430 vdso=0: disable VDSO mapping
3430 3431
3431 vdso32= [X86] 3432 vdso32= [X86] Control the 32-bit vDSO
3432 vdso32=2: enable compat VDSO (default with COMPAT_VDSO) 3433 vdso32=1: enable 32-bit VDSO
3433 vdso32=1: enable 32-bit VDSO (default) 3434 vdso32=0 or vdso32=2: disable 32-bit VDSO
3434 vdso32=0: disable 32-bit VDSO mapping 3435
3436 See the help text for CONFIG_COMPAT_VDSO for more
3437 details. If CONFIG_COMPAT_VDSO is set, the default is
3438 vdso32=0; otherwise, the default is vdso32=1.
3439
3440 For compatibility with older kernels, vdso32=2 is an
3441 alias for vdso32=0.
3442
3443 Try vdso32=0 if you encounter an error that says:
3444 dl_main: Assertion `(void *) ph->p_vaddr == _rtld_local._dl_sysinfo_dso' failed!
3435 3445
3436 vector= [IA-64,SMP] 3446 vector= [IA-64,SMP]
3437 vector=percpu: enable percpu vector domain 3447 vector=percpu: enable percpu vector domain
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 26237934ac87..ac04d9804391 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -107,9 +107,9 @@ config X86
107 select HAVE_ARCH_SOFT_DIRTY 107 select HAVE_ARCH_SOFT_DIRTY
108 select CLOCKSOURCE_WATCHDOG 108 select CLOCKSOURCE_WATCHDOG
109 select GENERIC_CLOCKEVENTS 109 select GENERIC_CLOCKEVENTS
110 select ARCH_CLOCKSOURCE_DATA if X86_64 110 select ARCH_CLOCKSOURCE_DATA
111 select GENERIC_CLOCKEVENTS_BROADCAST if X86_64 || (X86_32 && X86_LOCAL_APIC) 111 select GENERIC_CLOCKEVENTS_BROADCAST if X86_64 || (X86_32 && X86_LOCAL_APIC)
112 select GENERIC_TIME_VSYSCALL if X86_64 112 select GENERIC_TIME_VSYSCALL
113 select KTIME_SCALAR if X86_32 113 select KTIME_SCALAR if X86_32
114 select GENERIC_STRNCPY_FROM_USER 114 select GENERIC_STRNCPY_FROM_USER
115 select GENERIC_STRNLEN_USER 115 select GENERIC_STRNLEN_USER
@@ -1848,17 +1848,29 @@ config DEBUG_HOTPLUG_CPU0
1848 If unsure, say N. 1848 If unsure, say N.
1849 1849
1850config COMPAT_VDSO 1850config COMPAT_VDSO
1851 def_bool y 1851 def_bool n
1852 prompt "Compat VDSO support" 1852 prompt "Disable the 32-bit vDSO (needed for glibc 2.3.3)"
1853 depends on X86_32 || IA32_EMULATION 1853 depends on X86_32 || IA32_EMULATION
1854 ---help--- 1854 ---help---
1855 Map the 32-bit VDSO to the predictable old-style address too. 1855 Certain buggy versions of glibc will crash if they are
1856 presented with a 32-bit vDSO that is not mapped at the address
1857 indicated in its segment table.
1856 1858
1857 Say N here if you are running a sufficiently recent glibc 1859 The bug was introduced by f866314b89d56845f55e6f365e18b31ec978ec3a
1858 version (2.3.3 or later), to remove the high-mapped 1860 and fixed by 3b3ddb4f7db98ec9e912ccdf54d35df4aa30e04a and
1859 VDSO mapping and to exclusively use the randomized VDSO. 1861 49ad572a70b8aeb91e57483a11dd1b77e31c4468. Glibc 2.3.3 is
1862 the only released version with the bug, but OpenSUSE 9
1863 contains a buggy "glibc 2.3.2".
1860 1864
1861 If unsure, say Y. 1865 The symptom of the bug is that everything crashes on startup, saying:
1866 dl_main: Assertion `(void *) ph->p_vaddr == _rtld_local._dl_sysinfo_dso' failed!
1867
1868 Saying Y here changes the default value of the vdso32 boot
1869 option from 1 to 0, which turns off the 32-bit vDSO entirely.
1870 This works around the glibc bug but hurts performance.
1871
1872 If unsure, say N: if you are compiling your own kernel, you
1873 are unlikely to be using a buggy version of glibc.
1862 1874
1863config CMDLINE_BOOL 1875config CMDLINE_BOOL
1864 bool "Built-in kernel command line" 1876 bool "Built-in kernel command line"
diff --git a/arch/x86/include/asm/clocksource.h b/arch/x86/include/asm/clocksource.h
index 16a57f4ed64d..eda81dc0f4ae 100644
--- a/arch/x86/include/asm/clocksource.h
+++ b/arch/x86/include/asm/clocksource.h
@@ -3,8 +3,6 @@
3#ifndef _ASM_X86_CLOCKSOURCE_H 3#ifndef _ASM_X86_CLOCKSOURCE_H
4#define _ASM_X86_CLOCKSOURCE_H 4#define _ASM_X86_CLOCKSOURCE_H
5 5
6#ifdef CONFIG_X86_64
7
8#define VCLOCK_NONE 0 /* No vDSO clock available. */ 6#define VCLOCK_NONE 0 /* No vDSO clock available. */
9#define VCLOCK_TSC 1 /* vDSO should use vread_tsc. */ 7#define VCLOCK_TSC 1 /* vDSO should use vread_tsc. */
10#define VCLOCK_HPET 2 /* vDSO should use vread_hpet. */ 8#define VCLOCK_HPET 2 /* vDSO should use vread_hpet. */
@@ -14,6 +12,4 @@ struct arch_clocksource_data {
14 int vclock_mode; 12 int vclock_mode;
15}; 13};
16 14
17#endif /* CONFIG_X86_64 */
18
19#endif /* _ASM_X86_CLOCKSOURCE_H */ 15#endif /* _ASM_X86_CLOCKSOURCE_H */
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
index 9c999c1674fa..2c71182d30ef 100644
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -281,16 +281,12 @@ do { \
281 281
282#define STACK_RND_MASK (0x7ff) 282#define STACK_RND_MASK (0x7ff)
283 283
284#define VDSO_HIGH_BASE (__fix_to_virt(FIX_VDSO))
285
286#define ARCH_DLINFO ARCH_DLINFO_IA32(vdso_enabled) 284#define ARCH_DLINFO ARCH_DLINFO_IA32(vdso_enabled)
287 285
288/* update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT entries changes */ 286/* update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT entries changes */
289 287
290#else /* CONFIG_X86_32 */ 288#else /* CONFIG_X86_32 */
291 289
292#define VDSO_HIGH_BASE 0xffffe000U /* CONFIG_COMPAT_VDSO address */
293
294/* 1GB for 64bit, 8MB for 32bit */ 290/* 1GB for 64bit, 8MB for 32bit */
295#define STACK_RND_MASK (test_thread_flag(TIF_ADDR32) ? 0x7ff : 0x3fffff) 291#define STACK_RND_MASK (test_thread_flag(TIF_ADDR32) ? 0x7ff : 0x3fffff)
296 292
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index 7252cd339175..2377f5618fb7 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -40,15 +40,8 @@
40 */ 40 */
41extern unsigned long __FIXADDR_TOP; 41extern unsigned long __FIXADDR_TOP;
42#define FIXADDR_TOP ((unsigned long)__FIXADDR_TOP) 42#define FIXADDR_TOP ((unsigned long)__FIXADDR_TOP)
43
44#define FIXADDR_USER_START __fix_to_virt(FIX_VDSO)
45#define FIXADDR_USER_END __fix_to_virt(FIX_VDSO - 1)
46#else 43#else
47#define FIXADDR_TOP (VSYSCALL_END-PAGE_SIZE) 44#define FIXADDR_TOP (VSYSCALL_END-PAGE_SIZE)
48
49/* Only covers 32bit vsyscalls currently. Need another set for 64bit. */
50#define FIXADDR_USER_START ((unsigned long)VSYSCALL32_VSYSCALL)
51#define FIXADDR_USER_END (FIXADDR_USER_START + PAGE_SIZE)
52#endif 45#endif
53 46
54 47
@@ -74,7 +67,6 @@ extern unsigned long __FIXADDR_TOP;
74enum fixed_addresses { 67enum fixed_addresses {
75#ifdef CONFIG_X86_32 68#ifdef CONFIG_X86_32
76 FIX_HOLE, 69 FIX_HOLE,
77 FIX_VDSO,
78#else 70#else
79 VSYSCALL_LAST_PAGE, 71 VSYSCALL_LAST_PAGE,
80 VSYSCALL_FIRST_PAGE = VSYSCALL_LAST_PAGE 72 VSYSCALL_FIRST_PAGE = VSYSCALL_LAST_PAGE
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 708f19fb4fc7..eb3d44945133 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -214,13 +214,8 @@
214#ifdef CONFIG_X86_64 214#ifdef CONFIG_X86_64
215#define __PAGE_KERNEL_IDENT_LARGE_EXEC __PAGE_KERNEL_LARGE_EXEC 215#define __PAGE_KERNEL_IDENT_LARGE_EXEC __PAGE_KERNEL_LARGE_EXEC
216#else 216#else
217/*
218 * For PDE_IDENT_ATTR include USER bit. As the PDE and PTE protection
219 * bits are combined, this will alow user to access the high address mapped
220 * VDSO in the presence of CONFIG_COMPAT_VDSO
221 */
222#define PTE_IDENT_ATTR 0x003 /* PRESENT+RW */ 217#define PTE_IDENT_ATTR 0x003 /* PRESENT+RW */
223#define PDE_IDENT_ATTR 0x067 /* PRESENT+RW+USER+DIRTY+ACCESSED */ 218#define PDE_IDENT_ATTR 0x063 /* PRESENT+RW+DIRTY+ACCESSED */
224#define PGD_IDENT_ATTR 0x001 /* PRESENT (no other attributes) */ 219#define PGD_IDENT_ATTR 0x001 /* PRESENT (no other attributes) */
225#endif 220#endif
226 221
diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h
index fddb53d63915..d1dc55404ff1 100644
--- a/arch/x86/include/asm/vdso.h
+++ b/arch/x86/include/asm/vdso.h
@@ -1,8 +1,45 @@
1#ifndef _ASM_X86_VDSO_H 1#ifndef _ASM_X86_VDSO_H
2#define _ASM_X86_VDSO_H 2#define _ASM_X86_VDSO_H
3 3
4#include <asm/page_types.h>
5#include <linux/linkage.h>
6
7#ifdef __ASSEMBLER__
8
9#define DEFINE_VDSO_IMAGE(symname, filename) \
10__PAGE_ALIGNED_DATA ; \
11 .globl symname##_start, symname##_end ; \
12 .align PAGE_SIZE ; \
13 symname##_start: ; \
14 .incbin filename ; \
15 symname##_end: ; \
16 .align PAGE_SIZE /* extra data here leaks to userspace. */ ; \
17 \
18.previous ; \
19 \
20 .globl symname##_pages ; \
21 .bss ; \
22 .align 8 ; \
23 .type symname##_pages, @object ; \
24 symname##_pages: ; \
25 .zero (symname##_end - symname##_start + PAGE_SIZE - 1) / PAGE_SIZE * (BITS_PER_LONG / 8) ; \
26 .size symname##_pages, .-symname##_pages
27
28#else
29
30#define DECLARE_VDSO_IMAGE(symname) \
31 extern char symname##_start[], symname##_end[]; \
32 extern struct page *symname##_pages[]
33
4#if defined CONFIG_X86_32 || defined CONFIG_COMPAT 34#if defined CONFIG_X86_32 || defined CONFIG_COMPAT
5extern const char VDSO32_PRELINK[]; 35
36#include <asm/vdso32.h>
37
38DECLARE_VDSO_IMAGE(vdso32_int80);
39#ifdef CONFIG_COMPAT
40DECLARE_VDSO_IMAGE(vdso32_syscall);
41#endif
42DECLARE_VDSO_IMAGE(vdso32_sysenter);
6 43
7/* 44/*
8 * Given a pointer to the vDSO image, find the pointer to VDSO32_name 45 * Given a pointer to the vDSO image, find the pointer to VDSO32_name
@@ -11,8 +48,7 @@ extern const char VDSO32_PRELINK[];
11#define VDSO32_SYMBOL(base, name) \ 48#define VDSO32_SYMBOL(base, name) \
12({ \ 49({ \
13 extern const char VDSO32_##name[]; \ 50 extern const char VDSO32_##name[]; \
14 (void __user *)(VDSO32_##name - VDSO32_PRELINK + \ 51 (void __user *)(VDSO32_##name + (unsigned long)(base)); \
15 (unsigned long)(base)); \
16}) 52})
17#endif 53#endif
18 54
@@ -23,12 +59,8 @@ extern const char VDSO32_PRELINK[];
23extern void __user __kernel_sigreturn; 59extern void __user __kernel_sigreturn;
24extern void __user __kernel_rt_sigreturn; 60extern void __user __kernel_rt_sigreturn;
25 61
26/* 62void __init patch_vdso32(void *vdso, size_t len);
27 * These symbols are defined by vdso32.S to mark the bounds 63
28 * of the ELF DSO images included therein. 64#endif /* __ASSEMBLER__ */
29 */
30extern const char vdso32_int80_start, vdso32_int80_end;
31extern const char vdso32_syscall_start, vdso32_syscall_end;
32extern const char vdso32_sysenter_start, vdso32_sysenter_end;
33 65
34#endif /* _ASM_X86_VDSO_H */ 66#endif /* _ASM_X86_VDSO_H */
diff --git a/arch/x86/include/asm/vdso32.h b/arch/x86/include/asm/vdso32.h
new file mode 100644
index 000000000000..7efb7018406e
--- /dev/null
+++ b/arch/x86/include/asm/vdso32.h
@@ -0,0 +1,11 @@
1#ifndef _ASM_X86_VDSO32_H
2#define _ASM_X86_VDSO32_H
3
4#define VDSO_BASE_PAGE 0
5#define VDSO_VVAR_PAGE 1
6#define VDSO_HPET_PAGE 2
7#define VDSO_PAGES 3
8#define VDSO_PREV_PAGES 2
9#define VDSO_OFFSET(x) ((x) * PAGE_SIZE)
10
11#endif
diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h
index 46e24d36b7da..3c3366c2e37f 100644
--- a/arch/x86/include/asm/vgtod.h
+++ b/arch/x86/include/asm/vgtod.h
@@ -1,30 +1,73 @@
1#ifndef _ASM_X86_VGTOD_H 1#ifndef _ASM_X86_VGTOD_H
2#define _ASM_X86_VGTOD_H 2#define _ASM_X86_VGTOD_H
3 3
4#include <asm/vsyscall.h> 4#include <linux/compiler.h>
5#include <linux/clocksource.h> 5#include <linux/clocksource.h>
6 6
7#ifdef BUILD_VDSO32_64
8typedef u64 gtod_long_t;
9#else
10typedef unsigned long gtod_long_t;
11#endif
12/*
13 * vsyscall_gtod_data will be accessed by 32 and 64 bit code at the same time
14 * so be carefull by modifying this structure.
15 */
7struct vsyscall_gtod_data { 16struct vsyscall_gtod_data {
8 seqcount_t seq; 17 unsigned seq;
9 18
10 struct { /* extract of a clocksource struct */ 19 int vclock_mode;
11 int vclock_mode; 20 cycle_t cycle_last;
12 cycle_t cycle_last; 21 cycle_t mask;
13 cycle_t mask; 22 u32 mult;
14 u32 mult; 23 u32 shift;
15 u32 shift;
16 } clock;
17 24
18 /* open coded 'struct timespec' */ 25 /* open coded 'struct timespec' */
19 time_t wall_time_sec;
20 u64 wall_time_snsec; 26 u64 wall_time_snsec;
27 gtod_long_t wall_time_sec;
28 gtod_long_t monotonic_time_sec;
21 u64 monotonic_time_snsec; 29 u64 monotonic_time_snsec;
22 time_t monotonic_time_sec; 30 gtod_long_t wall_time_coarse_sec;
31 gtod_long_t wall_time_coarse_nsec;
32 gtod_long_t monotonic_time_coarse_sec;
33 gtod_long_t monotonic_time_coarse_nsec;
23 34
24 struct timezone sys_tz; 35 int tz_minuteswest;
25 struct timespec wall_time_coarse; 36 int tz_dsttime;
26 struct timespec monotonic_time_coarse;
27}; 37};
28extern struct vsyscall_gtod_data vsyscall_gtod_data; 38extern struct vsyscall_gtod_data vsyscall_gtod_data;
29 39
40static inline unsigned gtod_read_begin(const struct vsyscall_gtod_data *s)
41{
42 unsigned ret;
43
44repeat:
45 ret = ACCESS_ONCE(s->seq);
46 if (unlikely(ret & 1)) {
47 cpu_relax();
48 goto repeat;
49 }
50 smp_rmb();
51 return ret;
52}
53
54static inline int gtod_read_retry(const struct vsyscall_gtod_data *s,
55 unsigned start)
56{
57 smp_rmb();
58 return unlikely(s->seq != start);
59}
60
61static inline void gtod_write_begin(struct vsyscall_gtod_data *s)
62{
63 ++s->seq;
64 smp_wmb();
65}
66
67static inline void gtod_write_end(struct vsyscall_gtod_data *s)
68{
69 smp_wmb();
70 ++s->seq;
71}
72
30#endif /* _ASM_X86_VGTOD_H */ 73#endif /* _ASM_X86_VGTOD_H */
diff --git a/arch/x86/include/asm/vvar.h b/arch/x86/include/asm/vvar.h
index d76ac40da206..081d909bc495 100644
--- a/arch/x86/include/asm/vvar.h
+++ b/arch/x86/include/asm/vvar.h
@@ -16,8 +16,8 @@
16 * you mess up, the linker will catch it.) 16 * you mess up, the linker will catch it.)
17 */ 17 */
18 18
19/* Base address of vvars. This is not ABI. */ 19#ifndef _ASM_X86_VVAR_H
20#define VVAR_ADDRESS (-10*1024*1024 - 4096) 20#define _ASM_X86_VVAR_H
21 21
22#if defined(__VVAR_KERNEL_LDS) 22#if defined(__VVAR_KERNEL_LDS)
23 23
@@ -29,16 +29,35 @@
29 29
30#else 30#else
31 31
32#ifdef BUILD_VDSO32
33
34#define DECLARE_VVAR(offset, type, name) \
35 extern type vvar_ ## name __attribute__((visibility("hidden")));
36
37#define VVAR(name) (vvar_ ## name)
38
39#else
40
41extern char __vvar_page;
42
43/* Base address of vvars. This is not ABI. */
44#ifdef CONFIG_X86_64
45#define VVAR_ADDRESS (-10*1024*1024 - 4096)
46#else
47#define VVAR_ADDRESS (&__vvar_page)
48#endif
49
32#define DECLARE_VVAR(offset, type, name) \ 50#define DECLARE_VVAR(offset, type, name) \
33 static type const * const vvaraddr_ ## name = \ 51 static type const * const vvaraddr_ ## name = \
34 (void *)(VVAR_ADDRESS + (offset)); 52 (void *)(VVAR_ADDRESS + (offset));
35 53
54#define VVAR(name) (*vvaraddr_ ## name)
55#endif
56
36#define DEFINE_VVAR(type, name) \ 57#define DEFINE_VVAR(type, name) \
37 type name \ 58 type name \
38 __attribute__((section(".vvar_" #name), aligned(16))) __visible 59 __attribute__((section(".vvar_" #name), aligned(16))) __visible
39 60
40#define VVAR(name) (*vvaraddr_ ## name)
41
42#endif 61#endif
43 62
44/* DECLARE_VVAR(offset, type, name) */ 63/* DECLARE_VVAR(offset, type, name) */
@@ -48,3 +67,5 @@ DECLARE_VVAR(16, int, vgetcpu_mode)
48DECLARE_VVAR(128, struct vsyscall_gtod_data, vsyscall_gtod_data) 67DECLARE_VVAR(128, struct vsyscall_gtod_data, vsyscall_gtod_data)
49 68
50#undef DECLARE_VVAR 69#undef DECLARE_VVAR
70
71#endif
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index cb648c84b327..f4d96000d33a 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -26,7 +26,7 @@ obj-$(CONFIG_IRQ_WORK) += irq_work.o
26obj-y += probe_roms.o 26obj-y += probe_roms.o
27obj-$(CONFIG_X86_32) += i386_ksyms_32.o 27obj-$(CONFIG_X86_32) += i386_ksyms_32.o
28obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o 28obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o
29obj-y += syscall_$(BITS).o 29obj-y += syscall_$(BITS).o vsyscall_gtod.o
30obj-$(CONFIG_X86_64) += vsyscall_64.o 30obj-$(CONFIG_X86_64) += vsyscall_64.o
31obj-$(CONFIG_X86_64) += vsyscall_emu_64.o 31obj-$(CONFIG_X86_64) += vsyscall_emu_64.o
32obj-$(CONFIG_SYSFS) += ksysfs.o 32obj-$(CONFIG_SYSFS) += ksysfs.o
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 014618dbaa7b..93eed15a8fd4 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -752,9 +752,7 @@ static struct clocksource clocksource_hpet = {
752 .mask = HPET_MASK, 752 .mask = HPET_MASK,
753 .flags = CLOCK_SOURCE_IS_CONTINUOUS, 753 .flags = CLOCK_SOURCE_IS_CONTINUOUS,
754 .resume = hpet_resume_counter, 754 .resume = hpet_resume_counter,
755#ifdef CONFIG_X86_64
756 .archdata = { .vclock_mode = VCLOCK_HPET }, 755 .archdata = { .vclock_mode = VCLOCK_HPET },
757#endif
758}; 756};
759 757
760static int hpet_clocksource_register(void) 758static int hpet_clocksource_register(void)
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 7a9296ab8834..57e5ce126d5a 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -984,9 +984,7 @@ static struct clocksource clocksource_tsc = {
984 .mask = CLOCKSOURCE_MASK(64), 984 .mask = CLOCKSOURCE_MASK(64),
985 .flags = CLOCK_SOURCE_IS_CONTINUOUS | 985 .flags = CLOCK_SOURCE_IS_CONTINUOUS |
986 CLOCK_SOURCE_MUST_VERIFY, 986 CLOCK_SOURCE_MUST_VERIFY,
987#ifdef CONFIG_X86_64
988 .archdata = { .vclock_mode = VCLOCK_TSC }, 987 .archdata = { .vclock_mode = VCLOCK_TSC },
989#endif
990}; 988};
991 989
992void mark_tsc_unstable(char *reason) 990void mark_tsc_unstable(char *reason)
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index da6b35a98260..49edf2dd3613 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -147,7 +147,6 @@ SECTIONS
147 _edata = .; 147 _edata = .;
148 } :data 148 } :data
149 149
150#ifdef CONFIG_X86_64
151 150
152 . = ALIGN(PAGE_SIZE); 151 . = ALIGN(PAGE_SIZE);
153 __vvar_page = .; 152 __vvar_page = .;
@@ -165,12 +164,15 @@ SECTIONS
165#undef __VVAR_KERNEL_LDS 164#undef __VVAR_KERNEL_LDS
166#undef EMIT_VVAR 165#undef EMIT_VVAR
167 166
167 /*
168 * Pad the rest of the page with zeros. Otherwise the loader
169 * can leave garbage here.
170 */
171 . = __vvar_beginning_hack + PAGE_SIZE;
168 } :data 172 } :data
169 173
170 . = ALIGN(__vvar_page + PAGE_SIZE, PAGE_SIZE); 174 . = ALIGN(__vvar_page + PAGE_SIZE, PAGE_SIZE);
171 175
172#endif /* CONFIG_X86_64 */
173
174 /* Init code and data - will be freed after init */ 176 /* Init code and data - will be freed after init */
175 . = ALIGN(PAGE_SIZE); 177 . = ALIGN(PAGE_SIZE);
176 .init.begin : AT(ADDR(.init.begin) - LOAD_OFFSET) { 178 .init.begin : AT(ADDR(.init.begin) - LOAD_OFFSET) {
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index 1f96f9347ed9..9ea287666c65 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -47,14 +47,12 @@
47#include <asm/segment.h> 47#include <asm/segment.h>
48#include <asm/desc.h> 48#include <asm/desc.h>
49#include <asm/topology.h> 49#include <asm/topology.h>
50#include <asm/vgtod.h>
51#include <asm/traps.h> 50#include <asm/traps.h>
52 51
53#define CREATE_TRACE_POINTS 52#define CREATE_TRACE_POINTS
54#include "vsyscall_trace.h" 53#include "vsyscall_trace.h"
55 54
56DEFINE_VVAR(int, vgetcpu_mode); 55DEFINE_VVAR(int, vgetcpu_mode);
57DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data);
58 56
59static enum { EMULATE, NATIVE, NONE } vsyscall_mode = EMULATE; 57static enum { EMULATE, NATIVE, NONE } vsyscall_mode = EMULATE;
60 58
@@ -77,48 +75,6 @@ static int __init vsyscall_setup(char *str)
77} 75}
78early_param("vsyscall", vsyscall_setup); 76early_param("vsyscall", vsyscall_setup);
79 77
80void update_vsyscall_tz(void)
81{
82 vsyscall_gtod_data.sys_tz = sys_tz;
83}
84
85void update_vsyscall(struct timekeeper *tk)
86{
87 struct vsyscall_gtod_data *vdata = &vsyscall_gtod_data;
88
89 write_seqcount_begin(&vdata->seq);
90
91 /* copy vsyscall data */
92 vdata->clock.vclock_mode = tk->clock->archdata.vclock_mode;
93 vdata->clock.cycle_last = tk->clock->cycle_last;
94 vdata->clock.mask = tk->clock->mask;
95 vdata->clock.mult = tk->mult;
96 vdata->clock.shift = tk->shift;
97
98 vdata->wall_time_sec = tk->xtime_sec;
99 vdata->wall_time_snsec = tk->xtime_nsec;
100
101 vdata->monotonic_time_sec = tk->xtime_sec
102 + tk->wall_to_monotonic.tv_sec;
103 vdata->monotonic_time_snsec = tk->xtime_nsec
104 + (tk->wall_to_monotonic.tv_nsec
105 << tk->shift);
106 while (vdata->monotonic_time_snsec >=
107 (((u64)NSEC_PER_SEC) << tk->shift)) {
108 vdata->monotonic_time_snsec -=
109 ((u64)NSEC_PER_SEC) << tk->shift;
110 vdata->monotonic_time_sec++;
111 }
112
113 vdata->wall_time_coarse.tv_sec = tk->xtime_sec;
114 vdata->wall_time_coarse.tv_nsec = (long)(tk->xtime_nsec >> tk->shift);
115
116 vdata->monotonic_time_coarse = timespec_add(vdata->wall_time_coarse,
117 tk->wall_to_monotonic);
118
119 write_seqcount_end(&vdata->seq);
120}
121
122static void warn_bad_vsyscall(const char *level, struct pt_regs *regs, 78static void warn_bad_vsyscall(const char *level, struct pt_regs *regs,
123 const char *message) 79 const char *message)
124{ 80{
@@ -374,7 +330,6 @@ void __init map_vsyscall(void)
374{ 330{
375 extern char __vsyscall_page; 331 extern char __vsyscall_page;
376 unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page); 332 unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page);
377 extern char __vvar_page;
378 unsigned long physaddr_vvar_page = __pa_symbol(&__vvar_page); 333 unsigned long physaddr_vvar_page = __pa_symbol(&__vvar_page);
379 334
380 __set_fixmap(VSYSCALL_FIRST_PAGE, physaddr_vsyscall, 335 __set_fixmap(VSYSCALL_FIRST_PAGE, physaddr_vsyscall,
diff --git a/arch/x86/kernel/vsyscall_gtod.c b/arch/x86/kernel/vsyscall_gtod.c
new file mode 100644
index 000000000000..f9c6e56e14b5
--- /dev/null
+++ b/arch/x86/kernel/vsyscall_gtod.c
@@ -0,0 +1,69 @@
1/*
2 * Copyright (C) 2001 Andrea Arcangeli <andrea@suse.de> SuSE
3 * Copyright 2003 Andi Kleen, SuSE Labs.
4 *
5 * Modified for x86 32 bit architecture by
6 * Stefani Seibold <stefani@seibold.net>
7 * sponsored by Rohde & Schwarz GmbH & Co. KG Munich/Germany
8 *
9 * Thanks to hpa@transmeta.com for some useful hint.
10 * Special thanks to Ingo Molnar for his early experience with
11 * a different vsyscall implementation for Linux/IA32 and for the name.
12 *
13 */
14
15#include <linux/timekeeper_internal.h>
16#include <asm/vgtod.h>
17#include <asm/vvar.h>
18
19DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data);
20
21void update_vsyscall_tz(void)
22{
23 vsyscall_gtod_data.tz_minuteswest = sys_tz.tz_minuteswest;
24 vsyscall_gtod_data.tz_dsttime = sys_tz.tz_dsttime;
25}
26
27void update_vsyscall(struct timekeeper *tk)
28{
29 struct vsyscall_gtod_data *vdata = &vsyscall_gtod_data;
30
31 gtod_write_begin(vdata);
32
33 /* copy vsyscall data */
34 vdata->vclock_mode = tk->clock->archdata.vclock_mode;
35 vdata->cycle_last = tk->clock->cycle_last;
36 vdata->mask = tk->clock->mask;
37 vdata->mult = tk->mult;
38 vdata->shift = tk->shift;
39
40 vdata->wall_time_sec = tk->xtime_sec;
41 vdata->wall_time_snsec = tk->xtime_nsec;
42
43 vdata->monotonic_time_sec = tk->xtime_sec
44 + tk->wall_to_monotonic.tv_sec;
45 vdata->monotonic_time_snsec = tk->xtime_nsec
46 + (tk->wall_to_monotonic.tv_nsec
47 << tk->shift);
48 while (vdata->monotonic_time_snsec >=
49 (((u64)NSEC_PER_SEC) << tk->shift)) {
50 vdata->monotonic_time_snsec -=
51 ((u64)NSEC_PER_SEC) << tk->shift;
52 vdata->monotonic_time_sec++;
53 }
54
55 vdata->wall_time_coarse_sec = tk->xtime_sec;
56 vdata->wall_time_coarse_nsec = (long)(tk->xtime_nsec >> tk->shift);
57
58 vdata->monotonic_time_coarse_sec =
59 vdata->wall_time_coarse_sec + tk->wall_to_monotonic.tv_sec;
60 vdata->monotonic_time_coarse_nsec =
61 vdata->wall_time_coarse_nsec + tk->wall_to_monotonic.tv_nsec;
62
63 while (vdata->monotonic_time_coarse_nsec >= NSEC_PER_SEC) {
64 vdata->monotonic_time_coarse_nsec -= NSEC_PER_SEC;
65 vdata->monotonic_time_coarse_sec++;
66 }
67
68 gtod_write_end(vdata);
69}
diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c
index cfbdbdb4e173..bbb1d2259ecf 100644
--- a/arch/x86/tools/relocs.c
+++ b/arch/x86/tools/relocs.c
@@ -69,8 +69,8 @@ static const char * const sym_regex_kernel[S_NSYMTYPES] = {
69 "__per_cpu_load|" 69 "__per_cpu_load|"
70 "init_per_cpu__.*|" 70 "init_per_cpu__.*|"
71 "__end_rodata_hpage_align|" 71 "__end_rodata_hpage_align|"
72 "__vvar_page|"
73#endif 72#endif
73 "__vvar_page|"
74 "_end)$" 74 "_end)$"
75}; 75};
76 76
diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile
index 9206ac7961a5..c580d1210ffe 100644
--- a/arch/x86/vdso/Makefile
+++ b/arch/x86/vdso/Makefile
@@ -23,7 +23,8 @@ vobjs-$(VDSOX32-y) += $(vobjx32s-compat)
23vobj64s := $(filter-out $(vobjx32s-compat),$(vobjs-y)) 23vobj64s := $(filter-out $(vobjx32s-compat),$(vobjs-y))
24 24
25# files to link into kernel 25# files to link into kernel
26obj-$(VDSO64-y) += vma.o vdso.o 26obj-y += vma.o
27obj-$(VDSO64-y) += vdso.o
27obj-$(VDSOX32-y) += vdsox32.o 28obj-$(VDSOX32-y) += vdsox32.o
28obj-$(VDSO32-y) += vdso32.o vdso32-setup.o 29obj-$(VDSO32-y) += vdso32.o vdso32-setup.o
29 30
@@ -138,7 +139,7 @@ override obj-dirs = $(dir $(obj)) $(obj)/vdso32/
138 139
139targets += vdso32/vdso32.lds 140targets += vdso32/vdso32.lds
140targets += $(vdso32-images) $(vdso32-images:=.dbg) 141targets += $(vdso32-images) $(vdso32-images:=.dbg)
141targets += vdso32/note.o $(vdso32.so-y:%=vdso32/%.o) 142targets += vdso32/note.o vdso32/vclock_gettime.o $(vdso32.so-y:%=vdso32/%.o)
142 143
143extra-y += $(vdso32-images) 144extra-y += $(vdso32-images)
144 145
@@ -148,8 +149,19 @@ KBUILD_AFLAGS_32 := $(filter-out -m64,$(KBUILD_AFLAGS))
148$(vdso32-images:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_32) 149$(vdso32-images:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_32)
149$(vdso32-images:%=$(obj)/%.dbg): asflags-$(CONFIG_X86_64) += -m32 150$(vdso32-images:%=$(obj)/%.dbg): asflags-$(CONFIG_X86_64) += -m32
150 151
152KBUILD_CFLAGS_32 := $(filter-out -m64,$(KBUILD_CFLAGS))
153KBUILD_CFLAGS_32 := $(filter-out -mcmodel=kernel,$(KBUILD_CFLAGS_32))
154KBUILD_CFLAGS_32 := $(filter-out -fno-pic,$(KBUILD_CFLAGS_32))
155KBUILD_CFLAGS_32 := $(filter-out -mfentry,$(KBUILD_CFLAGS_32))
156KBUILD_CFLAGS_32 += -m32 -msoft-float -mregparm=0 -fpic
157KBUILD_CFLAGS_32 += $(call cc-option, -fno-stack-protector)
158KBUILD_CFLAGS_32 += $(call cc-option, -foptimize-sibling-calls)
159KBUILD_CFLAGS_32 += -fno-omit-frame-pointer
160$(vdso32-images:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_32)
161
151$(vdso32-images:%=$(obj)/%.dbg): $(obj)/vdso32-%.so.dbg: FORCE \ 162$(vdso32-images:%=$(obj)/%.dbg): $(obj)/vdso32-%.so.dbg: FORCE \
152 $(obj)/vdso32/vdso32.lds \ 163 $(obj)/vdso32/vdso32.lds \
164 $(obj)/vdso32/vclock_gettime.o \
153 $(obj)/vdso32/note.o \ 165 $(obj)/vdso32/note.o \
154 $(obj)/vdso32/%.o 166 $(obj)/vdso32/%.o
155 $(call if_changed,vdso) 167 $(call if_changed,vdso)
diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c
index eb5d7a56f8d4..16d686171e9a 100644
--- a/arch/x86/vdso/vclock_gettime.c
+++ b/arch/x86/vdso/vclock_gettime.c
@@ -4,6 +4,9 @@
4 * 4 *
5 * Fast user context implementation of clock_gettime, gettimeofday, and time. 5 * Fast user context implementation of clock_gettime, gettimeofday, and time.
6 * 6 *
7 * 32 Bit compat layer by Stefani Seibold <stefani@seibold.net>
8 * sponsored by Rohde & Schwarz GmbH & Co. KG Munich/Germany
9 *
7 * The code should have no internal unresolved relocations. 10 * The code should have no internal unresolved relocations.
8 * Check with readelf after changing. 11 * Check with readelf after changing.
9 */ 12 */
@@ -11,56 +14,55 @@
11/* Disable profiling for userspace code: */ 14/* Disable profiling for userspace code: */
12#define DISABLE_BRANCH_PROFILING 15#define DISABLE_BRANCH_PROFILING
13 16
14#include <linux/kernel.h> 17#include <uapi/linux/time.h>
15#include <linux/posix-timers.h>
16#include <linux/time.h>
17#include <linux/string.h>
18#include <asm/vsyscall.h>
19#include <asm/fixmap.h>
20#include <asm/vgtod.h> 18#include <asm/vgtod.h>
21#include <asm/timex.h>
22#include <asm/hpet.h> 19#include <asm/hpet.h>
20#include <asm/vvar.h>
23#include <asm/unistd.h> 21#include <asm/unistd.h>
24#include <asm/io.h> 22#include <asm/msr.h>
25#include <asm/pvclock.h> 23#include <linux/math64.h>
24#include <linux/time.h>
26 25
27#define gtod (&VVAR(vsyscall_gtod_data)) 26#define gtod (&VVAR(vsyscall_gtod_data))
28 27
29notrace static cycle_t vread_tsc(void) 28extern int __vdso_clock_gettime(clockid_t clock, struct timespec *ts);
29extern int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz);
30extern time_t __vdso_time(time_t *t);
31
32#ifdef CONFIG_HPET_TIMER
33static inline u32 read_hpet_counter(const volatile void *addr)
30{ 34{
31 cycle_t ret; 35 return *(const volatile u32 *) (addr + HPET_COUNTER);
32 u64 last; 36}
37#endif
33 38
34 /* 39#ifndef BUILD_VDSO32
35 * Empirically, a fence (of type that depends on the CPU)
36 * before rdtsc is enough to ensure that rdtsc is ordered
37 * with respect to loads. The various CPU manuals are unclear
38 * as to whether rdtsc can be reordered with later loads,
39 * but no one has ever seen it happen.
40 */
41 rdtsc_barrier();
42 ret = (cycle_t)vget_cycles();
43 40
44 last = VVAR(vsyscall_gtod_data).clock.cycle_last; 41#include <linux/kernel.h>
42#include <asm/vsyscall.h>
43#include <asm/fixmap.h>
44#include <asm/pvclock.h>
45 45
46 if (likely(ret >= last)) 46static notrace cycle_t vread_hpet(void)
47 return ret; 47{
48 return read_hpet_counter((const void *)fix_to_virt(VSYSCALL_HPET));
49}
48 50
49 /* 51notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
50 * GCC likes to generate cmov here, but this branch is extremely 52{
51 * predictable (it's just a funciton of time and the likely is 53 long ret;
52 * very likely) and there's a data dependence, so force GCC 54 asm("syscall" : "=a" (ret) :
53 * to generate a branch instead. I don't barrier() because 55 "0" (__NR_clock_gettime), "D" (clock), "S" (ts) : "memory");
54 * we don't actually need a barrier, and if this function 56 return ret;
55 * ever gets inlined it will generate worse code.
56 */
57 asm volatile ("");
58 return last;
59} 57}
60 58
61static notrace cycle_t vread_hpet(void) 59notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz)
62{ 60{
63 return readl((const void __iomem *)fix_to_virt(VSYSCALL_HPET) + HPET_COUNTER); 61 long ret;
62
63 asm("syscall" : "=a" (ret) :
64 "0" (__NR_gettimeofday), "D" (tv), "S" (tz) : "memory");
65 return ret;
64} 66}
65 67
66#ifdef CONFIG_PARAVIRT_CLOCK 68#ifdef CONFIG_PARAVIRT_CLOCK
@@ -124,7 +126,7 @@ static notrace cycle_t vread_pvclock(int *mode)
124 *mode = VCLOCK_NONE; 126 *mode = VCLOCK_NONE;
125 127
126 /* refer to tsc.c read_tsc() comment for rationale */ 128 /* refer to tsc.c read_tsc() comment for rationale */
127 last = VVAR(vsyscall_gtod_data).clock.cycle_last; 129 last = gtod->cycle_last;
128 130
129 if (likely(ret >= last)) 131 if (likely(ret >= last))
130 return ret; 132 return ret;
@@ -133,11 +135,30 @@ static notrace cycle_t vread_pvclock(int *mode)
133} 135}
134#endif 136#endif
135 137
138#else
139
140extern u8 hpet_page
141 __attribute__((visibility("hidden")));
142
143#ifdef CONFIG_HPET_TIMER
144static notrace cycle_t vread_hpet(void)
145{
146 return read_hpet_counter((const void *)(&hpet_page));
147}
148#endif
149
136notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) 150notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
137{ 151{
138 long ret; 152 long ret;
139 asm("syscall" : "=a" (ret) : 153
140 "0" (__NR_clock_gettime),"D" (clock), "S" (ts) : "memory"); 154 asm(
155 "mov %%ebx, %%edx \n"
156 "mov %2, %%ebx \n"
157 "call VDSO32_vsyscall \n"
158 "mov %%edx, %%ebx \n"
159 : "=a" (ret)
160 : "0" (__NR_clock_gettime), "g" (clock), "c" (ts)
161 : "memory", "edx");
141 return ret; 162 return ret;
142} 163}
143 164
@@ -145,28 +166,79 @@ notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz)
145{ 166{
146 long ret; 167 long ret;
147 168
148 asm("syscall" : "=a" (ret) : 169 asm(
149 "0" (__NR_gettimeofday), "D" (tv), "S" (tz) : "memory"); 170 "mov %%ebx, %%edx \n"
171 "mov %2, %%ebx \n"
172 "call VDSO32_vsyscall \n"
173 "mov %%edx, %%ebx \n"
174 : "=a" (ret)
175 : "0" (__NR_gettimeofday), "g" (tv), "c" (tz)
176 : "memory", "edx");
150 return ret; 177 return ret;
151} 178}
152 179
180#ifdef CONFIG_PARAVIRT_CLOCK
181
182static notrace cycle_t vread_pvclock(int *mode)
183{
184 *mode = VCLOCK_NONE;
185 return 0;
186}
187#endif
188
189#endif
190
191notrace static cycle_t vread_tsc(void)
192{
193 cycle_t ret;
194 u64 last;
195
196 /*
197 * Empirically, a fence (of type that depends on the CPU)
198 * before rdtsc is enough to ensure that rdtsc is ordered
199 * with respect to loads. The various CPU manuals are unclear
200 * as to whether rdtsc can be reordered with later loads,
201 * but no one has ever seen it happen.
202 */
203 rdtsc_barrier();
204 ret = (cycle_t)__native_read_tsc();
205
206 last = gtod->cycle_last;
207
208 if (likely(ret >= last))
209 return ret;
210
211 /*
212 * GCC likes to generate cmov here, but this branch is extremely
213 * predictable (it's just a funciton of time and the likely is
214 * very likely) and there's a data dependence, so force GCC
215 * to generate a branch instead. I don't barrier() because
216 * we don't actually need a barrier, and if this function
217 * ever gets inlined it will generate worse code.
218 */
219 asm volatile ("");
220 return last;
221}
153 222
154notrace static inline u64 vgetsns(int *mode) 223notrace static inline u64 vgetsns(int *mode)
155{ 224{
156 long v; 225 u64 v;
157 cycles_t cycles; 226 cycles_t cycles;
158 if (gtod->clock.vclock_mode == VCLOCK_TSC) 227
228 if (gtod->vclock_mode == VCLOCK_TSC)
159 cycles = vread_tsc(); 229 cycles = vread_tsc();
160 else if (gtod->clock.vclock_mode == VCLOCK_HPET) 230#ifdef CONFIG_HPET_TIMER
231 else if (gtod->vclock_mode == VCLOCK_HPET)
161 cycles = vread_hpet(); 232 cycles = vread_hpet();
233#endif
162#ifdef CONFIG_PARAVIRT_CLOCK 234#ifdef CONFIG_PARAVIRT_CLOCK
163 else if (gtod->clock.vclock_mode == VCLOCK_PVCLOCK) 235 else if (gtod->vclock_mode == VCLOCK_PVCLOCK)
164 cycles = vread_pvclock(mode); 236 cycles = vread_pvclock(mode);
165#endif 237#endif
166 else 238 else
167 return 0; 239 return 0;
168 v = (cycles - gtod->clock.cycle_last) & gtod->clock.mask; 240 v = (cycles - gtod->cycle_last) & gtod->mask;
169 return v * gtod->clock.mult; 241 return v * gtod->mult;
170} 242}
171 243
172/* Code size doesn't matter (vdso is 4k anyway) and this is faster. */ 244/* Code size doesn't matter (vdso is 4k anyway) and this is faster. */
@@ -176,106 +248,102 @@ notrace static int __always_inline do_realtime(struct timespec *ts)
176 u64 ns; 248 u64 ns;
177 int mode; 249 int mode;
178 250
179 ts->tv_nsec = 0;
180 do { 251 do {
181 seq = raw_read_seqcount_begin(&gtod->seq); 252 seq = gtod_read_begin(gtod);
182 mode = gtod->clock.vclock_mode; 253 mode = gtod->vclock_mode;
183 ts->tv_sec = gtod->wall_time_sec; 254 ts->tv_sec = gtod->wall_time_sec;
184 ns = gtod->wall_time_snsec; 255 ns = gtod->wall_time_snsec;
185 ns += vgetsns(&mode); 256 ns += vgetsns(&mode);
186 ns >>= gtod->clock.shift; 257 ns >>= gtod->shift;
187 } while (unlikely(read_seqcount_retry(&gtod->seq, seq))); 258 } while (unlikely(gtod_read_retry(gtod, seq)));
259
260 ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
261 ts->tv_nsec = ns;
188 262
189 timespec_add_ns(ts, ns);
190 return mode; 263 return mode;
191} 264}
192 265
193notrace static int do_monotonic(struct timespec *ts) 266notrace static int __always_inline do_monotonic(struct timespec *ts)
194{ 267{
195 unsigned long seq; 268 unsigned long seq;
196 u64 ns; 269 u64 ns;
197 int mode; 270 int mode;
198 271
199 ts->tv_nsec = 0;
200 do { 272 do {
201 seq = raw_read_seqcount_begin(&gtod->seq); 273 seq = gtod_read_begin(gtod);
202 mode = gtod->clock.vclock_mode; 274 mode = gtod->vclock_mode;
203 ts->tv_sec = gtod->monotonic_time_sec; 275 ts->tv_sec = gtod->monotonic_time_sec;
204 ns = gtod->monotonic_time_snsec; 276 ns = gtod->monotonic_time_snsec;
205 ns += vgetsns(&mode); 277 ns += vgetsns(&mode);
206 ns >>= gtod->clock.shift; 278 ns >>= gtod->shift;
207 } while (unlikely(read_seqcount_retry(&gtod->seq, seq))); 279 } while (unlikely(gtod_read_retry(gtod, seq)));
208 timespec_add_ns(ts, ns); 280
281 ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
282 ts->tv_nsec = ns;
209 283
210 return mode; 284 return mode;
211} 285}
212 286
213notrace static int do_realtime_coarse(struct timespec *ts) 287notrace static void do_realtime_coarse(struct timespec *ts)
214{ 288{
215 unsigned long seq; 289 unsigned long seq;
216 do { 290 do {
217 seq = raw_read_seqcount_begin(&gtod->seq); 291 seq = gtod_read_begin(gtod);
218 ts->tv_sec = gtod->wall_time_coarse.tv_sec; 292 ts->tv_sec = gtod->wall_time_coarse_sec;
219 ts->tv_nsec = gtod->wall_time_coarse.tv_nsec; 293 ts->tv_nsec = gtod->wall_time_coarse_nsec;
220 } while (unlikely(read_seqcount_retry(&gtod->seq, seq))); 294 } while (unlikely(gtod_read_retry(gtod, seq)));
221 return 0;
222} 295}
223 296
224notrace static int do_monotonic_coarse(struct timespec *ts) 297notrace static void do_monotonic_coarse(struct timespec *ts)
225{ 298{
226 unsigned long seq; 299 unsigned long seq;
227 do { 300 do {
228 seq = raw_read_seqcount_begin(&gtod->seq); 301 seq = gtod_read_begin(gtod);
229 ts->tv_sec = gtod->monotonic_time_coarse.tv_sec; 302 ts->tv_sec = gtod->monotonic_time_coarse_sec;
230 ts->tv_nsec = gtod->monotonic_time_coarse.tv_nsec; 303 ts->tv_nsec = gtod->monotonic_time_coarse_nsec;
231 } while (unlikely(read_seqcount_retry(&gtod->seq, seq))); 304 } while (unlikely(gtod_read_retry(gtod, seq)));
232
233 return 0;
234} 305}
235 306
236notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts) 307notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
237{ 308{
238 int ret = VCLOCK_NONE;
239
240 switch (clock) { 309 switch (clock) {
241 case CLOCK_REALTIME: 310 case CLOCK_REALTIME:
242 ret = do_realtime(ts); 311 if (do_realtime(ts) == VCLOCK_NONE)
312 goto fallback;
243 break; 313 break;
244 case CLOCK_MONOTONIC: 314 case CLOCK_MONOTONIC:
245 ret = do_monotonic(ts); 315 if (do_monotonic(ts) == VCLOCK_NONE)
316 goto fallback;
246 break; 317 break;
247 case CLOCK_REALTIME_COARSE: 318 case CLOCK_REALTIME_COARSE:
248 return do_realtime_coarse(ts); 319 do_realtime_coarse(ts);
320 break;
249 case CLOCK_MONOTONIC_COARSE: 321 case CLOCK_MONOTONIC_COARSE:
250 return do_monotonic_coarse(ts); 322 do_monotonic_coarse(ts);
323 break;
324 default:
325 goto fallback;
251 } 326 }
252 327
253 if (ret == VCLOCK_NONE)
254 return vdso_fallback_gettime(clock, ts);
255 return 0; 328 return 0;
329fallback:
330 return vdso_fallback_gettime(clock, ts);
256} 331}
257int clock_gettime(clockid_t, struct timespec *) 332int clock_gettime(clockid_t, struct timespec *)
258 __attribute__((weak, alias("__vdso_clock_gettime"))); 333 __attribute__((weak, alias("__vdso_clock_gettime")));
259 334
260notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz) 335notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
261{ 336{
262 long ret = VCLOCK_NONE;
263
264 if (likely(tv != NULL)) { 337 if (likely(tv != NULL)) {
265 BUILD_BUG_ON(offsetof(struct timeval, tv_usec) != 338 if (unlikely(do_realtime((struct timespec *)tv) == VCLOCK_NONE))
266 offsetof(struct timespec, tv_nsec) || 339 return vdso_fallback_gtod(tv, tz);
267 sizeof(*tv) != sizeof(struct timespec));
268 ret = do_realtime((struct timespec *)tv);
269 tv->tv_usec /= 1000; 340 tv->tv_usec /= 1000;
270 } 341 }
271 if (unlikely(tz != NULL)) { 342 if (unlikely(tz != NULL)) {
272 /* Avoid memcpy. Some old compilers fail to inline it */ 343 tz->tz_minuteswest = gtod->tz_minuteswest;
273 tz->tz_minuteswest = gtod->sys_tz.tz_minuteswest; 344 tz->tz_dsttime = gtod->tz_dsttime;
274 tz->tz_dsttime = gtod->sys_tz.tz_dsttime;
275 } 345 }
276 346
277 if (ret == VCLOCK_NONE)
278 return vdso_fallback_gtod(tv, tz);
279 return 0; 347 return 0;
280} 348}
281int gettimeofday(struct timeval *, struct timezone *) 349int gettimeofday(struct timeval *, struct timezone *)
@@ -287,8 +355,8 @@ int gettimeofday(struct timeval *, struct timezone *)
287 */ 355 */
288notrace time_t __vdso_time(time_t *t) 356notrace time_t __vdso_time(time_t *t)
289{ 357{
290 /* This is atomic on x86_64 so we don't need any locks. */ 358 /* This is atomic on x86 so we don't need any locks. */
291 time_t result = ACCESS_ONCE(VVAR(vsyscall_gtod_data).wall_time_sec); 359 time_t result = ACCESS_ONCE(gtod->wall_time_sec);
292 360
293 if (t) 361 if (t)
294 *t = result; 362 *t = result;
diff --git a/arch/x86/vdso/vdso-layout.lds.S b/arch/x86/vdso/vdso-layout.lds.S
index 634a2cf62046..2e263f367b13 100644
--- a/arch/x86/vdso/vdso-layout.lds.S
+++ b/arch/x86/vdso/vdso-layout.lds.S
@@ -6,7 +6,25 @@
6 6
7SECTIONS 7SECTIONS
8{ 8{
9 . = VDSO_PRELINK + SIZEOF_HEADERS; 9#ifdef BUILD_VDSO32
10#include <asm/vdso32.h>
11
12 .hpet_sect : {
13 hpet_page = . - VDSO_OFFSET(VDSO_HPET_PAGE);
14 } :text :hpet_sect
15
16 .vvar_sect : {
17 vvar = . - VDSO_OFFSET(VDSO_VVAR_PAGE);
18
19 /* Place all vvars at the offsets in asm/vvar.h. */
20#define EMIT_VVAR(name, offset) vvar_ ## name = vvar + offset;
21#define __VVAR_KERNEL_LDS
22#include <asm/vvar.h>
23#undef __VVAR_KERNEL_LDS
24#undef EMIT_VVAR
25 } :text :vvar_sect
26#endif
27 . = SIZEOF_HEADERS;
10 28
11 .hash : { *(.hash) } :text 29 .hash : { *(.hash) } :text
12 .gnu.hash : { *(.gnu.hash) } 30 .gnu.hash : { *(.gnu.hash) }
@@ -44,6 +62,11 @@ SECTIONS
44 . = ALIGN(0x100); 62 . = ALIGN(0x100);
45 63
46 .text : { *(.text*) } :text =0x90909090 64 .text : { *(.text*) } :text =0x90909090
65
66 /DISCARD/ : {
67 *(.discard)
68 *(.discard.*)
69 }
47} 70}
48 71
49/* 72/*
@@ -61,4 +84,8 @@ PHDRS
61 dynamic PT_DYNAMIC FLAGS(4); /* PF_R */ 84 dynamic PT_DYNAMIC FLAGS(4); /* PF_R */
62 note PT_NOTE FLAGS(4); /* PF_R */ 85 note PT_NOTE FLAGS(4); /* PF_R */
63 eh_frame_hdr PT_GNU_EH_FRAME; 86 eh_frame_hdr PT_GNU_EH_FRAME;
87#ifdef BUILD_VDSO32
88 vvar_sect PT_NULL FLAGS(4); /* PF_R */
89 hpet_sect PT_NULL FLAGS(4); /* PF_R */
90#endif
64} 91}
diff --git a/arch/x86/vdso/vdso.S b/arch/x86/vdso/vdso.S
index 1e13eb8c9656..be3f23b09af5 100644
--- a/arch/x86/vdso/vdso.S
+++ b/arch/x86/vdso/vdso.S
@@ -1,21 +1,3 @@
1#include <asm/page_types.h> 1#include <asm/vdso.h>
2#include <linux/linkage.h>
3 2
4__PAGE_ALIGNED_DATA 3DEFINE_VDSO_IMAGE(vdso, "arch/x86/vdso/vdso.so")
5
6 .globl vdso_start, vdso_end
7 .align PAGE_SIZE
8vdso_start:
9 .incbin "arch/x86/vdso/vdso.so"
10vdso_end:
11 .align PAGE_SIZE /* extra data here leaks to userspace. */
12
13.previous
14
15 .globl vdso_pages
16 .bss
17 .align 8
18 .type vdso_pages, @object
19vdso_pages:
20 .zero (vdso_end - vdso_start + PAGE_SIZE - 1) / PAGE_SIZE * 8
21 .size vdso_pages, .-vdso_pages
diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c
index d6bfb876cfb0..00348980a3a6 100644
--- a/arch/x86/vdso/vdso32-setup.c
+++ b/arch/x86/vdso/vdso32-setup.c
@@ -16,6 +16,7 @@
16#include <linux/mm.h> 16#include <linux/mm.h>
17#include <linux/err.h> 17#include <linux/err.h>
18#include <linux/module.h> 18#include <linux/module.h>
19#include <linux/slab.h>
19 20
20#include <asm/cpufeature.h> 21#include <asm/cpufeature.h>
21#include <asm/msr.h> 22#include <asm/msr.h>
@@ -25,17 +26,14 @@
25#include <asm/tlbflush.h> 26#include <asm/tlbflush.h>
26#include <asm/vdso.h> 27#include <asm/vdso.h>
27#include <asm/proto.h> 28#include <asm/proto.h>
28 29#include <asm/fixmap.h>
29enum { 30#include <asm/hpet.h>
30 VDSO_DISABLED = 0, 31#include <asm/vvar.h>
31 VDSO_ENABLED = 1,
32 VDSO_COMPAT = 2,
33};
34 32
35#ifdef CONFIG_COMPAT_VDSO 33#ifdef CONFIG_COMPAT_VDSO
36#define VDSO_DEFAULT VDSO_COMPAT 34#define VDSO_DEFAULT 0
37#else 35#else
38#define VDSO_DEFAULT VDSO_ENABLED 36#define VDSO_DEFAULT 1
39#endif 37#endif
40 38
41#ifdef CONFIG_X86_64 39#ifdef CONFIG_X86_64
@@ -44,13 +42,6 @@ enum {
44#endif 42#endif
45 43
46/* 44/*
47 * This is the difference between the prelinked addresses in the vDSO images
48 * and the VDSO_HIGH_BASE address where CONFIG_COMPAT_VDSO places the vDSO
49 * in the user address space.
50 */
51#define VDSO_ADDR_ADJUST (VDSO_HIGH_BASE - (unsigned long)VDSO32_PRELINK)
52
53/*
54 * Should the kernel map a VDSO page into processes and pass its 45 * Should the kernel map a VDSO page into processes and pass its
55 * address down to glibc upon exec()? 46 * address down to glibc upon exec()?
56 */ 47 */
@@ -60,6 +51,9 @@ static int __init vdso_setup(char *s)
60{ 51{
61 vdso_enabled = simple_strtoul(s, NULL, 0); 52 vdso_enabled = simple_strtoul(s, NULL, 0);
62 53
54 if (vdso_enabled > 1)
55 pr_warn("vdso32 values other than 0 and 1 are no longer allowed; vdso disabled\n");
56
63 return 1; 57 return 1;
64} 58}
65 59
@@ -76,124 +70,8 @@ __setup_param("vdso=", vdso32_setup, vdso_setup, 0);
76EXPORT_SYMBOL_GPL(vdso_enabled); 70EXPORT_SYMBOL_GPL(vdso_enabled);
77#endif 71#endif
78 72
79static __init void reloc_symtab(Elf32_Ehdr *ehdr, 73static struct page **vdso32_pages;
80 unsigned offset, unsigned size) 74static unsigned vdso32_size;
81{
82 Elf32_Sym *sym = (void *)ehdr + offset;
83 unsigned nsym = size / sizeof(*sym);
84 unsigned i;
85
86 for(i = 0; i < nsym; i++, sym++) {
87 if (sym->st_shndx == SHN_UNDEF ||
88 sym->st_shndx == SHN_ABS)
89 continue; /* skip */
90
91 if (sym->st_shndx > SHN_LORESERVE) {
92 printk(KERN_INFO "VDSO: unexpected st_shndx %x\n",
93 sym->st_shndx);
94 continue;
95 }
96
97 switch(ELF_ST_TYPE(sym->st_info)) {
98 case STT_OBJECT:
99 case STT_FUNC:
100 case STT_SECTION:
101 case STT_FILE:
102 sym->st_value += VDSO_ADDR_ADJUST;
103 }
104 }
105}
106
107static __init void reloc_dyn(Elf32_Ehdr *ehdr, unsigned offset)
108{
109 Elf32_Dyn *dyn = (void *)ehdr + offset;
110
111 for(; dyn->d_tag != DT_NULL; dyn++)
112 switch(dyn->d_tag) {
113 case DT_PLTGOT:
114 case DT_HASH:
115 case DT_STRTAB:
116 case DT_SYMTAB:
117 case DT_RELA:
118 case DT_INIT:
119 case DT_FINI:
120 case DT_REL:
121 case DT_DEBUG:
122 case DT_JMPREL:
123 case DT_VERSYM:
124 case DT_VERDEF:
125 case DT_VERNEED:
126 case DT_ADDRRNGLO ... DT_ADDRRNGHI:
127 /* definitely pointers needing relocation */
128 dyn->d_un.d_ptr += VDSO_ADDR_ADJUST;
129 break;
130
131 case DT_ENCODING ... OLD_DT_LOOS-1:
132 case DT_LOOS ... DT_HIOS-1:
133 /* Tags above DT_ENCODING are pointers if
134 they're even */
135 if (dyn->d_tag >= DT_ENCODING &&
136 (dyn->d_tag & 1) == 0)
137 dyn->d_un.d_ptr += VDSO_ADDR_ADJUST;
138 break;
139
140 case DT_VERDEFNUM:
141 case DT_VERNEEDNUM:
142 case DT_FLAGS_1:
143 case DT_RELACOUNT:
144 case DT_RELCOUNT:
145 case DT_VALRNGLO ... DT_VALRNGHI:
146 /* definitely not pointers */
147 break;
148
149 case OLD_DT_LOOS ... DT_LOOS-1:
150 case DT_HIOS ... DT_VALRNGLO-1:
151 default:
152 if (dyn->d_tag > DT_ENCODING)
153 printk(KERN_INFO "VDSO: unexpected DT_tag %x\n",
154 dyn->d_tag);
155 break;
156 }
157}
158
159static __init void relocate_vdso(Elf32_Ehdr *ehdr)
160{
161 Elf32_Phdr *phdr;
162 Elf32_Shdr *shdr;
163 int i;
164
165 BUG_ON(memcmp(ehdr->e_ident, ELFMAG, SELFMAG) != 0 ||
166 !elf_check_arch_ia32(ehdr) ||
167 ehdr->e_type != ET_DYN);
168
169 ehdr->e_entry += VDSO_ADDR_ADJUST;
170
171 /* rebase phdrs */
172 phdr = (void *)ehdr + ehdr->e_phoff;
173 for (i = 0; i < ehdr->e_phnum; i++) {
174 phdr[i].p_vaddr += VDSO_ADDR_ADJUST;
175
176 /* relocate dynamic stuff */
177 if (phdr[i].p_type == PT_DYNAMIC)
178 reloc_dyn(ehdr, phdr[i].p_offset);
179 }
180
181 /* rebase sections */
182 shdr = (void *)ehdr + ehdr->e_shoff;
183 for(i = 0; i < ehdr->e_shnum; i++) {
184 if (!(shdr[i].sh_flags & SHF_ALLOC))
185 continue;
186
187 shdr[i].sh_addr += VDSO_ADDR_ADJUST;
188
189 if (shdr[i].sh_type == SHT_SYMTAB ||
190 shdr[i].sh_type == SHT_DYNSYM)
191 reloc_symtab(ehdr, shdr[i].sh_offset,
192 shdr[i].sh_size);
193 }
194}
195
196static struct page *vdso32_pages[1];
197 75
198#ifdef CONFIG_X86_64 76#ifdef CONFIG_X86_64
199 77
@@ -212,12 +90,6 @@ void syscall32_cpu_init(void)
212 wrmsrl(MSR_CSTAR, ia32_cstar_target); 90 wrmsrl(MSR_CSTAR, ia32_cstar_target);
213} 91}
214 92
215#define compat_uses_vma 1
216
217static inline void map_compat_vdso(int map)
218{
219}
220
221#else /* CONFIG_X86_32 */ 93#else /* CONFIG_X86_32 */
222 94
223#define vdso32_sysenter() (boot_cpu_has(X86_FEATURE_SEP)) 95#define vdso32_sysenter() (boot_cpu_has(X86_FEATURE_SEP))
@@ -241,64 +113,36 @@ void enable_sep_cpu(void)
241 put_cpu(); 113 put_cpu();
242} 114}
243 115
244static struct vm_area_struct gate_vma;
245
246static int __init gate_vma_init(void)
247{
248 gate_vma.vm_mm = NULL;
249 gate_vma.vm_start = FIXADDR_USER_START;
250 gate_vma.vm_end = FIXADDR_USER_END;
251 gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC;
252 gate_vma.vm_page_prot = __P101;
253
254 return 0;
255}
256
257#define compat_uses_vma 0
258
259static void map_compat_vdso(int map)
260{
261 static int vdso_mapped;
262
263 if (map == vdso_mapped)
264 return;
265
266 vdso_mapped = map;
267
268 __set_fixmap(FIX_VDSO, page_to_pfn(vdso32_pages[0]) << PAGE_SHIFT,
269 map ? PAGE_READONLY_EXEC : PAGE_NONE);
270
271 /* flush stray tlbs */
272 flush_tlb_all();
273}
274
275#endif /* CONFIG_X86_64 */ 116#endif /* CONFIG_X86_64 */
276 117
277int __init sysenter_setup(void) 118int __init sysenter_setup(void)
278{ 119{
279 void *syscall_page = (void *)get_zeroed_page(GFP_ATOMIC); 120 char *vdso32_start, *vdso32_end;
280 const void *vsyscall; 121 int npages, i;
281 size_t vsyscall_len;
282
283 vdso32_pages[0] = virt_to_page(syscall_page);
284
285#ifdef CONFIG_X86_32
286 gate_vma_init();
287#endif
288 122
123#ifdef CONFIG_COMPAT
289 if (vdso32_syscall()) { 124 if (vdso32_syscall()) {
290 vsyscall = &vdso32_syscall_start; 125 vdso32_start = vdso32_syscall_start;
291 vsyscall_len = &vdso32_syscall_end - &vdso32_syscall_start; 126 vdso32_end = vdso32_syscall_end;
292 } else if (vdso32_sysenter()){ 127 vdso32_pages = vdso32_syscall_pages;
293 vsyscall = &vdso32_sysenter_start; 128 } else
294 vsyscall_len = &vdso32_sysenter_end - &vdso32_sysenter_start; 129#endif
130 if (vdso32_sysenter()) {
131 vdso32_start = vdso32_sysenter_start;
132 vdso32_end = vdso32_sysenter_end;
133 vdso32_pages = vdso32_sysenter_pages;
295 } else { 134 } else {
296 vsyscall = &vdso32_int80_start; 135 vdso32_start = vdso32_int80_start;
297 vsyscall_len = &vdso32_int80_end - &vdso32_int80_start; 136 vdso32_end = vdso32_int80_end;
137 vdso32_pages = vdso32_int80_pages;
298 } 138 }
299 139
300 memcpy(syscall_page, vsyscall, vsyscall_len); 140 npages = ((vdso32_end - vdso32_start) + PAGE_SIZE - 1) / PAGE_SIZE;
301 relocate_vdso(syscall_page); 141 vdso32_size = npages << PAGE_SHIFT;
142 for (i = 0; i < npages; i++)
143 vdso32_pages[i] = virt_to_page(vdso32_start + i*PAGE_SIZE);
144
145 patch_vdso32(vdso32_start, vdso32_size);
302 146
303 return 0; 147 return 0;
304} 148}
@@ -309,48 +153,73 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
309 struct mm_struct *mm = current->mm; 153 struct mm_struct *mm = current->mm;
310 unsigned long addr; 154 unsigned long addr;
311 int ret = 0; 155 int ret = 0;
312 bool compat; 156 struct vm_area_struct *vma;
313 157
314#ifdef CONFIG_X86_X32_ABI 158#ifdef CONFIG_X86_X32_ABI
315 if (test_thread_flag(TIF_X32)) 159 if (test_thread_flag(TIF_X32))
316 return x32_setup_additional_pages(bprm, uses_interp); 160 return x32_setup_additional_pages(bprm, uses_interp);
317#endif 161#endif
318 162
319 if (vdso_enabled == VDSO_DISABLED) 163 if (vdso_enabled != 1) /* Other values all mean "disabled" */
320 return 0; 164 return 0;
321 165
322 down_write(&mm->mmap_sem); 166 down_write(&mm->mmap_sem);
323 167
324 /* Test compat mode once here, in case someone 168 addr = get_unmapped_area(NULL, 0, vdso32_size + VDSO_OFFSET(VDSO_PREV_PAGES), 0, 0);
325 changes it via sysctl */ 169 if (IS_ERR_VALUE(addr)) {
326 compat = (vdso_enabled == VDSO_COMPAT); 170 ret = addr;
171 goto up_fail;
172 }
173
174 addr += VDSO_OFFSET(VDSO_PREV_PAGES);
327 175
328 map_compat_vdso(compat); 176 current->mm->context.vdso = (void *)addr;
329 177
330 if (compat) 178 /*
331 addr = VDSO_HIGH_BASE; 179 * MAYWRITE to allow gdb to COW and set breakpoints
332 else { 180 */
333 addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0); 181 ret = install_special_mapping(mm,
334 if (IS_ERR_VALUE(addr)) { 182 addr,
335 ret = addr; 183 vdso32_size,
336 goto up_fail; 184 VM_READ|VM_EXEC|
337 } 185 VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
186 vdso32_pages);
187
188 if (ret)
189 goto up_fail;
190
191 vma = _install_special_mapping(mm,
192 addr - VDSO_OFFSET(VDSO_PREV_PAGES),
193 VDSO_OFFSET(VDSO_PREV_PAGES),
194 VM_READ,
195 NULL);
196
197 if (IS_ERR(vma)) {
198 ret = PTR_ERR(vma);
199 goto up_fail;
338 } 200 }
339 201
340 current->mm->context.vdso = (void *)addr; 202 ret = remap_pfn_range(vma,
203 addr - VDSO_OFFSET(VDSO_VVAR_PAGE),
204 __pa_symbol(&__vvar_page) >> PAGE_SHIFT,
205 PAGE_SIZE,
206 PAGE_READONLY);
207
208 if (ret)
209 goto up_fail;
341 210
342 if (compat_uses_vma || !compat) { 211#ifdef CONFIG_HPET_TIMER
343 /* 212 if (hpet_address) {
344 * MAYWRITE to allow gdb to COW and set breakpoints 213 ret = io_remap_pfn_range(vma,
345 */ 214 addr - VDSO_OFFSET(VDSO_HPET_PAGE),
346 ret = install_special_mapping(mm, addr, PAGE_SIZE, 215 hpet_address >> PAGE_SHIFT,
347 VM_READ|VM_EXEC| 216 PAGE_SIZE,
348 VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, 217 pgprot_noncached(PAGE_READONLY));
349 vdso32_pages);
350 218
351 if (ret) 219 if (ret)
352 goto up_fail; 220 goto up_fail;
353 } 221 }
222#endif
354 223
355 current_thread_info()->sysenter_return = 224 current_thread_info()->sysenter_return =
356 VDSO32_SYMBOL(addr, SYSENTER_RETURN); 225 VDSO32_SYMBOL(addr, SYSENTER_RETURN);
@@ -411,20 +280,12 @@ const char *arch_vma_name(struct vm_area_struct *vma)
411 280
412struct vm_area_struct *get_gate_vma(struct mm_struct *mm) 281struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
413{ 282{
414 /*
415 * Check to see if the corresponding task was created in compat vdso
416 * mode.
417 */
418 if (mm && mm->context.vdso == (void *)VDSO_HIGH_BASE)
419 return &gate_vma;
420 return NULL; 283 return NULL;
421} 284}
422 285
423int in_gate_area(struct mm_struct *mm, unsigned long addr) 286int in_gate_area(struct mm_struct *mm, unsigned long addr)
424{ 287{
425 const struct vm_area_struct *vma = get_gate_vma(mm); 288 return 0;
426
427 return vma && addr >= vma->vm_start && addr < vma->vm_end;
428} 289}
429 290
430int in_gate_area_no_mm(unsigned long addr) 291int in_gate_area_no_mm(unsigned long addr)
diff --git a/arch/x86/vdso/vdso32.S b/arch/x86/vdso/vdso32.S
index 2ce5f82c333b..018bcd9f97b4 100644
--- a/arch/x86/vdso/vdso32.S
+++ b/arch/x86/vdso/vdso32.S
@@ -1,22 +1,9 @@
1#include <linux/init.h> 1#include <asm/vdso.h>
2 2
3__INITDATA 3DEFINE_VDSO_IMAGE(vdso32_int80, "arch/x86/vdso/vdso32-int80.so")
4 4
5 .globl vdso32_int80_start, vdso32_int80_end
6vdso32_int80_start:
7 .incbin "arch/x86/vdso/vdso32-int80.so"
8vdso32_int80_end:
9
10 .globl vdso32_syscall_start, vdso32_syscall_end
11vdso32_syscall_start:
12#ifdef CONFIG_COMPAT 5#ifdef CONFIG_COMPAT
13 .incbin "arch/x86/vdso/vdso32-syscall.so" 6DEFINE_VDSO_IMAGE(vdso32_syscall, "arch/x86/vdso/vdso32-syscall.so")
14#endif 7#endif
15vdso32_syscall_end:
16
17 .globl vdso32_sysenter_start, vdso32_sysenter_end
18vdso32_sysenter_start:
19 .incbin "arch/x86/vdso/vdso32-sysenter.so"
20vdso32_sysenter_end:
21 8
22__FINIT 9DEFINE_VDSO_IMAGE(vdso32_sysenter, "arch/x86/vdso/vdso32-sysenter.so")
diff --git a/arch/x86/vdso/vdso32/vclock_gettime.c b/arch/x86/vdso/vdso32/vclock_gettime.c
new file mode 100644
index 000000000000..175cc72c0f68
--- /dev/null
+++ b/arch/x86/vdso/vdso32/vclock_gettime.c
@@ -0,0 +1,30 @@
1#define BUILD_VDSO32
2
3#ifndef CONFIG_CC_OPTIMIZE_FOR_SIZE
4#undef CONFIG_OPTIMIZE_INLINING
5#endif
6
7#undef CONFIG_X86_PPRO_FENCE
8
9#ifdef CONFIG_X86_64
10
11/*
12 * in case of a 32 bit VDSO for a 64 bit kernel fake a 32 bit kernel
13 * configuration
14 */
15#undef CONFIG_64BIT
16#undef CONFIG_X86_64
17#undef CONFIG_ILLEGAL_POINTER_VALUE
18#undef CONFIG_SPARSEMEM_VMEMMAP
19#undef CONFIG_NR_CPUS
20
21#define CONFIG_X86_32 1
22#define CONFIG_PAGE_OFFSET 0
23#define CONFIG_ILLEGAL_POINTER_VALUE 0
24#define CONFIG_NR_CPUS 1
25
26#define BUILD_VDSO32_64
27
28#endif
29
30#include "../vclock_gettime.c"
diff --git a/arch/x86/vdso/vdso32/vdso32.lds.S b/arch/x86/vdso/vdso32/vdso32.lds.S
index 976124bb5f92..aadb8b9994cd 100644
--- a/arch/x86/vdso/vdso32/vdso32.lds.S
+++ b/arch/x86/vdso/vdso32/vdso32.lds.S
@@ -8,7 +8,11 @@
8 * values visible using the asm-x86/vdso.h macros from the kernel proper. 8 * values visible using the asm-x86/vdso.h macros from the kernel proper.
9 */ 9 */
10 10
11#include <asm/page.h>
12
13#define BUILD_VDSO32
11#define VDSO_PRELINK 0 14#define VDSO_PRELINK 0
15
12#include "../vdso-layout.lds.S" 16#include "../vdso-layout.lds.S"
13 17
14/* The ELF entry point can be used to set the AT_SYSINFO value. */ 18/* The ELF entry point can be used to set the AT_SYSINFO value. */
@@ -19,6 +23,13 @@ ENTRY(__kernel_vsyscall);
19 */ 23 */
20VERSION 24VERSION
21{ 25{
26 LINUX_2.6 {
27 global:
28 __vdso_clock_gettime;
29 __vdso_gettimeofday;
30 __vdso_time;
31 };
32
22 LINUX_2.5 { 33 LINUX_2.5 {
23 global: 34 global:
24 __kernel_vsyscall; 35 __kernel_vsyscall;
@@ -31,7 +42,9 @@ VERSION
31/* 42/*
32 * Symbols we define here called VDSO* get their values into vdso32-syms.h. 43 * Symbols we define here called VDSO* get their values into vdso32-syms.h.
33 */ 44 */
34VDSO32_PRELINK = VDSO_PRELINK;
35VDSO32_vsyscall = __kernel_vsyscall; 45VDSO32_vsyscall = __kernel_vsyscall;
36VDSO32_sigreturn = __kernel_sigreturn; 46VDSO32_sigreturn = __kernel_sigreturn;
37VDSO32_rt_sigreturn = __kernel_rt_sigreturn; 47VDSO32_rt_sigreturn = __kernel_rt_sigreturn;
48VDSO32_clock_gettime = clock_gettime;
49VDSO32_gettimeofday = gettimeofday;
50VDSO32_time = time;
diff --git a/arch/x86/vdso/vdsox32.S b/arch/x86/vdso/vdsox32.S
index 295f1c7543d8..f4aa34e7f370 100644
--- a/arch/x86/vdso/vdsox32.S
+++ b/arch/x86/vdso/vdsox32.S
@@ -1,21 +1,3 @@
1#include <asm/page_types.h> 1#include <asm/vdso.h>
2#include <linux/linkage.h>
3 2
4__PAGE_ALIGNED_DATA 3DEFINE_VDSO_IMAGE(vdsox32, "arch/x86/vdso/vdsox32.so")
5
6 .globl vdsox32_start, vdsox32_end
7 .align PAGE_SIZE
8vdsox32_start:
9 .incbin "arch/x86/vdso/vdsox32.so"
10vdsox32_end:
11 .align PAGE_SIZE /* extra data here leaks to userspace. */
12
13.previous
14
15 .globl vdsox32_pages
16 .bss
17 .align 8
18 .type vdsox32_pages, @object
19vdsox32_pages:
20 .zero (vdsox32_end - vdsox32_start + PAGE_SIZE - 1) / PAGE_SIZE * 8
21 .size vdsox32_pages, .-vdsox32_pages
diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c
index 431e87544411..1ad102613127 100644
--- a/arch/x86/vdso/vma.c
+++ b/arch/x86/vdso/vma.c
@@ -16,20 +16,22 @@
16#include <asm/vdso.h> 16#include <asm/vdso.h>
17#include <asm/page.h> 17#include <asm/page.h>
18 18
19#if defined(CONFIG_X86_64)
19unsigned int __read_mostly vdso_enabled = 1; 20unsigned int __read_mostly vdso_enabled = 1;
20 21
21extern char vdso_start[], vdso_end[]; 22DECLARE_VDSO_IMAGE(vdso);
22extern unsigned short vdso_sync_cpuid; 23extern unsigned short vdso_sync_cpuid;
23
24extern struct page *vdso_pages[];
25static unsigned vdso_size; 24static unsigned vdso_size;
26 25
27#ifdef CONFIG_X86_X32_ABI 26#ifdef CONFIG_X86_X32_ABI
28extern char vdsox32_start[], vdsox32_end[]; 27DECLARE_VDSO_IMAGE(vdsox32);
29extern struct page *vdsox32_pages[];
30static unsigned vdsox32_size; 28static unsigned vdsox32_size;
29#endif
30#endif
31 31
32static void __init patch_vdsox32(void *vdso, size_t len) 32#if defined(CONFIG_X86_32) || defined(CONFIG_X86_X32_ABI) || \
33 defined(CONFIG_COMPAT)
34void __init patch_vdso32(void *vdso, size_t len)
33{ 35{
34 Elf32_Ehdr *hdr = vdso; 36 Elf32_Ehdr *hdr = vdso;
35 Elf32_Shdr *sechdrs, *alt_sec = 0; 37 Elf32_Shdr *sechdrs, *alt_sec = 0;
@@ -52,7 +54,7 @@ static void __init patch_vdsox32(void *vdso, size_t len)
52 } 54 }
53 55
54 /* If we get here, it's probably a bug. */ 56 /* If we get here, it's probably a bug. */
55 pr_warning("patch_vdsox32: .altinstructions not found\n"); 57 pr_warning("patch_vdso32: .altinstructions not found\n");
56 return; /* nothing to patch */ 58 return; /* nothing to patch */
57 59
58found: 60found:
@@ -61,6 +63,7 @@ found:
61} 63}
62#endif 64#endif
63 65
66#if defined(CONFIG_X86_64)
64static void __init patch_vdso64(void *vdso, size_t len) 67static void __init patch_vdso64(void *vdso, size_t len)
65{ 68{
66 Elf64_Ehdr *hdr = vdso; 69 Elf64_Ehdr *hdr = vdso;
@@ -104,7 +107,7 @@ static int __init init_vdso(void)
104 vdso_pages[i] = virt_to_page(vdso_start + i*PAGE_SIZE); 107 vdso_pages[i] = virt_to_page(vdso_start + i*PAGE_SIZE);
105 108
106#ifdef CONFIG_X86_X32_ABI 109#ifdef CONFIG_X86_X32_ABI
107 patch_vdsox32(vdsox32_start, vdsox32_end - vdsox32_start); 110 patch_vdso32(vdsox32_start, vdsox32_end - vdsox32_start);
108 npages = (vdsox32_end - vdsox32_start + PAGE_SIZE - 1) / PAGE_SIZE; 111 npages = (vdsox32_end - vdsox32_start + PAGE_SIZE - 1) / PAGE_SIZE;
109 vdsox32_size = npages << PAGE_SHIFT; 112 vdsox32_size = npages << PAGE_SHIFT;
110 for (i = 0; i < npages; i++) 113 for (i = 0; i < npages; i++)
@@ -204,3 +207,4 @@ static __init int vdso_setup(char *s)
204 return 0; 207 return 0;
205} 208}
206__setup("vdso=", vdso_setup); 209__setup("vdso=", vdso_setup);
210#endif
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 2423ef04ffea..86e02eabb640 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -2058,7 +2058,6 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
2058 case FIX_RO_IDT: 2058 case FIX_RO_IDT:
2059#ifdef CONFIG_X86_32 2059#ifdef CONFIG_X86_32
2060 case FIX_WP_TEST: 2060 case FIX_WP_TEST:
2061 case FIX_VDSO:
2062# ifdef CONFIG_HIGHMEM 2061# ifdef CONFIG_HIGHMEM
2063 case FIX_KMAP_BEGIN ... FIX_KMAP_END: 2062 case FIX_KMAP_BEGIN ... FIX_KMAP_END:
2064# endif 2063# endif
diff --git a/include/linux/mm.h b/include/linux/mm.h
index a0df4295e171..2eec61fe75c9 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1756,6 +1756,9 @@ extern void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file);
1756extern struct file *get_mm_exe_file(struct mm_struct *mm); 1756extern struct file *get_mm_exe_file(struct mm_struct *mm);
1757 1757
1758extern int may_expand_vm(struct mm_struct *mm, unsigned long npages); 1758extern int may_expand_vm(struct mm_struct *mm, unsigned long npages);
1759extern struct vm_area_struct *_install_special_mapping(struct mm_struct *mm,
1760 unsigned long addr, unsigned long len,
1761 unsigned long flags, struct page **pages);
1759extern int install_special_mapping(struct mm_struct *mm, 1762extern int install_special_mapping(struct mm_struct *mm,
1760 unsigned long addr, unsigned long len, 1763 unsigned long addr, unsigned long len,
1761 unsigned long flags, struct page **pages); 1764 unsigned long flags, struct page **pages);
diff --git a/mm/mmap.c b/mm/mmap.c
index 20ff0c33274c..81ba54ff96c7 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2918,7 +2918,7 @@ static const struct vm_operations_struct special_mapping_vmops = {
2918 * The array pointer and the pages it points to are assumed to stay alive 2918 * The array pointer and the pages it points to are assumed to stay alive
2919 * for as long as this mapping might exist. 2919 * for as long as this mapping might exist.
2920 */ 2920 */
2921int install_special_mapping(struct mm_struct *mm, 2921struct vm_area_struct *_install_special_mapping(struct mm_struct *mm,
2922 unsigned long addr, unsigned long len, 2922 unsigned long addr, unsigned long len,
2923 unsigned long vm_flags, struct page **pages) 2923 unsigned long vm_flags, struct page **pages)
2924{ 2924{
@@ -2927,7 +2927,7 @@ int install_special_mapping(struct mm_struct *mm,
2927 2927
2928 vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); 2928 vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
2929 if (unlikely(vma == NULL)) 2929 if (unlikely(vma == NULL))
2930 return -ENOMEM; 2930 return ERR_PTR(-ENOMEM);
2931 2931
2932 INIT_LIST_HEAD(&vma->anon_vma_chain); 2932 INIT_LIST_HEAD(&vma->anon_vma_chain);
2933 vma->vm_mm = mm; 2933 vma->vm_mm = mm;
@@ -2948,11 +2948,23 @@ int install_special_mapping(struct mm_struct *mm,
2948 2948
2949 perf_event_mmap(vma); 2949 perf_event_mmap(vma);
2950 2950
2951 return 0; 2951 return vma;
2952 2952
2953out: 2953out:
2954 kmem_cache_free(vm_area_cachep, vma); 2954 kmem_cache_free(vm_area_cachep, vma);
2955 return ret; 2955 return ERR_PTR(ret);
2956}
2957
2958int install_special_mapping(struct mm_struct *mm,
2959 unsigned long addr, unsigned long len,
2960 unsigned long vm_flags, struct page **pages)
2961{
2962 struct vm_area_struct *vma = _install_special_mapping(mm,
2963 addr, len, vm_flags, pages);
2964
2965 if (IS_ERR(vma))
2966 return PTR_ERR(vma);
2967 return 0;
2956} 2968}
2957 2969
2958static DEFINE_MUTEX(mm_all_locks_mutex); 2970static DEFINE_MUTEX(mm_all_locks_mutex);