aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2011-07-22 20:05:15 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2011-07-22 20:05:15 -0400
commit8e204874db000928e37199c2db82b7eb8966cc3c (patch)
treeeae66035cb761c3c5a79e98b92280b5156bc01ef /arch/x86
parent3e0b8df79ddb8955d2cce5e858972a9cfe763384 (diff)
parentaafade242ff24fac3aabf61c7861dfa44a3c2445 (diff)
Merge branch 'x86-vdso-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'x86-vdso-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: x86-64, vdso: Do not allocate memory for the vDSO clocksource: Change __ARCH_HAS_CLOCKSOURCE_DATA to a CONFIG option x86, vdso: Drop now wrong comment Document the vDSO and add a reference parser ia64: Replace clocksource.fsys_mmio with generic arch data x86-64: Move vread_tsc and vread_hpet into the vDSO clocksource: Replace vread with generic arch data x86-64: Add --no-undefined to vDSO build x86-64: Allow alternative patching in the vDSO x86: Make alternative instruction pointers relative x86-64: Improve vsyscall emulation CS and RIP handling x86-64: Emulate legacy vsyscalls x86-64: Fill unused parts of the vsyscall page with 0xcc x86-64: Remove vsyscall number 3 (venosys) x86-64: Map the HPET NX x86-64: Remove kernel.vsyscall64 sysctl x86-64: Give vvars their own page x86-64: Document some of entry_64.S x86-64: Fix alignment of jiffies variable
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig4
-rw-r--r--arch/x86/include/asm/alternative-asm.h4
-rw-r--r--arch/x86/include/asm/alternative.h8
-rw-r--r--arch/x86/include/asm/clocksource.h18
-rw-r--r--arch/x86/include/asm/cpufeature.h8
-rw-r--r--arch/x86/include/asm/fixmap.h1
-rw-r--r--arch/x86/include/asm/irq_vectors.h6
-rw-r--r--arch/x86/include/asm/pgtable_types.h6
-rw-r--r--arch/x86/include/asm/traps.h4
-rw-r--r--arch/x86/include/asm/tsc.h4
-rw-r--r--arch/x86/include/asm/vgtod.h3
-rw-r--r--arch/x86/include/asm/vsyscall.h4
-rw-r--r--arch/x86/include/asm/vvar.h24
-rw-r--r--arch/x86/kernel/Makefile8
-rw-r--r--arch/x86/kernel/alternative.c23
-rw-r--r--arch/x86/kernel/entry_64.S4
-rw-r--r--arch/x86/kernel/hpet.c11
-rw-r--r--arch/x86/kernel/traps.c6
-rw-r--r--arch/x86/kernel/tsc.c2
-rw-r--r--arch/x86/kernel/vmlinux.lds.S49
-rw-r--r--arch/x86/kernel/vread_tsc_64.c36
-rw-r--r--arch/x86/kernel/vsyscall_64.c310
-rw-r--r--arch/x86/kernel/vsyscall_emu_64.S27
-rw-r--r--arch/x86/lib/copy_page_64.S9
-rw-r--r--arch/x86/lib/memmove_64.S11
-rw-r--r--arch/x86/vdso/Makefile1
-rw-r--r--arch/x86/vdso/vclock_gettime.c103
-rw-r--r--arch/x86/vdso/vdso.S15
-rw-r--r--arch/x86/vdso/vma.c58
29 files changed, 400 insertions, 367 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index fc76e4209003..5f60ea190d5b 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -95,6 +95,10 @@ config CLOCKSOURCE_WATCHDOG
95config GENERIC_CLOCKEVENTS 95config GENERIC_CLOCKEVENTS
96 def_bool y 96 def_bool y
97 97
98config ARCH_CLOCKSOURCE_DATA
99 def_bool y
100 depends on X86_64
101
98config GENERIC_CLOCKEVENTS_BROADCAST 102config GENERIC_CLOCKEVENTS_BROADCAST
99 def_bool y 103 def_bool y
100 depends on X86_64 || (X86_32 && X86_LOCAL_APIC) 104 depends on X86_64 || (X86_32 && X86_LOCAL_APIC)
diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h
index 94d420b360d1..4554cc6fb96a 100644
--- a/arch/x86/include/asm/alternative-asm.h
+++ b/arch/x86/include/asm/alternative-asm.h
@@ -17,8 +17,8 @@
17 17
18.macro altinstruction_entry orig alt feature orig_len alt_len 18.macro altinstruction_entry orig alt feature orig_len alt_len
19 .align 8 19 .align 8
20 .quad \orig 20 .long \orig - .
21 .quad \alt 21 .long \alt - .
22 .word \feature 22 .word \feature
23 .byte \orig_len 23 .byte \orig_len
24 .byte \alt_len 24 .byte \alt_len
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index bf535f947e8c..23fb6d79f209 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -43,8 +43,8 @@
43#endif 43#endif
44 44
45struct alt_instr { 45struct alt_instr {
46 u8 *instr; /* original instruction */ 46 s32 instr_offset; /* original instruction */
47 u8 *replacement; 47 s32 repl_offset; /* offset to replacement instruction */
48 u16 cpuid; /* cpuid bit set for replacement */ 48 u16 cpuid; /* cpuid bit set for replacement */
49 u8 instrlen; /* length of original instruction */ 49 u8 instrlen; /* length of original instruction */
50 u8 replacementlen; /* length of new instruction, <= instrlen */ 50 u8 replacementlen; /* length of new instruction, <= instrlen */
@@ -84,8 +84,8 @@ static inline int alternatives_text_reserved(void *start, void *end)
84 "661:\n\t" oldinstr "\n662:\n" \ 84 "661:\n\t" oldinstr "\n662:\n" \
85 ".section .altinstructions,\"a\"\n" \ 85 ".section .altinstructions,\"a\"\n" \
86 _ASM_ALIGN "\n" \ 86 _ASM_ALIGN "\n" \
87 _ASM_PTR "661b\n" /* label */ \ 87 " .long 661b - .\n" /* label */ \
88 _ASM_PTR "663f\n" /* new instruction */ \ 88 " .long 663f - .\n" /* new instruction */ \
89 " .word " __stringify(feature) "\n" /* feature bit */ \ 89 " .word " __stringify(feature) "\n" /* feature bit */ \
90 " .byte 662b-661b\n" /* sourcelen */ \ 90 " .byte 662b-661b\n" /* sourcelen */ \
91 " .byte 664f-663f\n" /* replacementlen */ \ 91 " .byte 664f-663f\n" /* replacementlen */ \
diff --git a/arch/x86/include/asm/clocksource.h b/arch/x86/include/asm/clocksource.h
new file mode 100644
index 000000000000..0bdbbb3b9ce7
--- /dev/null
+++ b/arch/x86/include/asm/clocksource.h
@@ -0,0 +1,18 @@
1/* x86-specific clocksource additions */
2
3#ifndef _ASM_X86_CLOCKSOURCE_H
4#define _ASM_X86_CLOCKSOURCE_H
5
6#ifdef CONFIG_X86_64
7
8#define VCLOCK_NONE 0 /* No vDSO clock available. */
9#define VCLOCK_TSC 1 /* vDSO should use vread_tsc. */
10#define VCLOCK_HPET 2 /* vDSO should use vread_hpet. */
11
12struct arch_clocksource_data {
13 int vclock_mode;
14};
15
16#endif /* CONFIG_X86_64 */
17
18#endif /* _ASM_X86_CLOCKSOURCE_H */
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 71cc3800712c..9929b35929ff 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -331,8 +331,8 @@ static __always_inline __pure bool __static_cpu_has(u16 bit)
331 "2:\n" 331 "2:\n"
332 ".section .altinstructions,\"a\"\n" 332 ".section .altinstructions,\"a\"\n"
333 _ASM_ALIGN "\n" 333 _ASM_ALIGN "\n"
334 _ASM_PTR "1b\n" 334 " .long 1b - .\n"
335 _ASM_PTR "0\n" /* no replacement */ 335 " .long 0\n" /* no replacement */
336 " .word %P0\n" /* feature bit */ 336 " .word %P0\n" /* feature bit */
337 " .byte 2b - 1b\n" /* source len */ 337 " .byte 2b - 1b\n" /* source len */
338 " .byte 0\n" /* replacement len */ 338 " .byte 0\n" /* replacement len */
@@ -349,8 +349,8 @@ static __always_inline __pure bool __static_cpu_has(u16 bit)
349 "2:\n" 349 "2:\n"
350 ".section .altinstructions,\"a\"\n" 350 ".section .altinstructions,\"a\"\n"
351 _ASM_ALIGN "\n" 351 _ASM_ALIGN "\n"
352 _ASM_PTR "1b\n" 352 " .long 1b - .\n"
353 _ASM_PTR "3f\n" 353 " .long 3f - .\n"
354 " .word %P1\n" /* feature bit */ 354 " .word %P1\n" /* feature bit */
355 " .byte 2b - 1b\n" /* source len */ 355 " .byte 2b - 1b\n" /* source len */
356 " .byte 4f - 3f\n" /* replacement len */ 356 " .byte 4f - 3f\n" /* replacement len */
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index 4729b2b63117..460c74e4852c 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -78,6 +78,7 @@ enum fixed_addresses {
78 VSYSCALL_LAST_PAGE, 78 VSYSCALL_LAST_PAGE,
79 VSYSCALL_FIRST_PAGE = VSYSCALL_LAST_PAGE 79 VSYSCALL_FIRST_PAGE = VSYSCALL_LAST_PAGE
80 + ((VSYSCALL_END-VSYSCALL_START) >> PAGE_SHIFT) - 1, 80 + ((VSYSCALL_END-VSYSCALL_START) >> PAGE_SHIFT) - 1,
81 VVAR_PAGE,
81 VSYSCALL_HPET, 82 VSYSCALL_HPET,
82#endif 83#endif
83 FIX_DBGP_BASE, 84 FIX_DBGP_BASE,
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 6665026ea3ea..f9a320984a10 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -17,7 +17,8 @@
17 * Vectors 0 ... 31 : system traps and exceptions - hardcoded events 17 * Vectors 0 ... 31 : system traps and exceptions - hardcoded events
18 * Vectors 32 ... 127 : device interrupts 18 * Vectors 32 ... 127 : device interrupts
19 * Vector 128 : legacy int80 syscall interface 19 * Vector 128 : legacy int80 syscall interface
20 * Vectors 129 ... INVALIDATE_TLB_VECTOR_START-1 : device interrupts 20 * Vector 204 : legacy x86_64 vsyscall emulation
21 * Vectors 129 ... INVALIDATE_TLB_VECTOR_START-1 except 204 : device interrupts
21 * Vectors INVALIDATE_TLB_VECTOR_START ... 255 : special interrupts 22 * Vectors INVALIDATE_TLB_VECTOR_START ... 255 : special interrupts
22 * 23 *
23 * 64-bit x86 has per CPU IDT tables, 32-bit has one shared IDT table. 24 * 64-bit x86 has per CPU IDT tables, 32-bit has one shared IDT table.
@@ -50,6 +51,9 @@
50#ifdef CONFIG_X86_32 51#ifdef CONFIG_X86_32
51# define SYSCALL_VECTOR 0x80 52# define SYSCALL_VECTOR 0x80
52#endif 53#endif
54#ifdef CONFIG_X86_64
55# define VSYSCALL_EMU_VECTOR 0xcc
56#endif
53 57
54/* 58/*
55 * Vectors 0x30-0x3f are used for ISA interrupts. 59 * Vectors 0x30-0x3f are used for ISA interrupts.
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index d56187c6b838..013286a10c2c 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -107,7 +107,8 @@
107#define __PAGE_KERNEL_NOCACHE (__PAGE_KERNEL | _PAGE_PCD | _PAGE_PWT) 107#define __PAGE_KERNEL_NOCACHE (__PAGE_KERNEL | _PAGE_PCD | _PAGE_PWT)
108#define __PAGE_KERNEL_UC_MINUS (__PAGE_KERNEL | _PAGE_PCD) 108#define __PAGE_KERNEL_UC_MINUS (__PAGE_KERNEL | _PAGE_PCD)
109#define __PAGE_KERNEL_VSYSCALL (__PAGE_KERNEL_RX | _PAGE_USER) 109#define __PAGE_KERNEL_VSYSCALL (__PAGE_KERNEL_RX | _PAGE_USER)
110#define __PAGE_KERNEL_VSYSCALL_NOCACHE (__PAGE_KERNEL_VSYSCALL | _PAGE_PCD | _PAGE_PWT) 110#define __PAGE_KERNEL_VVAR (__PAGE_KERNEL_RO | _PAGE_USER)
111#define __PAGE_KERNEL_VVAR_NOCACHE (__PAGE_KERNEL_VVAR | _PAGE_PCD | _PAGE_PWT)
111#define __PAGE_KERNEL_LARGE (__PAGE_KERNEL | _PAGE_PSE) 112#define __PAGE_KERNEL_LARGE (__PAGE_KERNEL | _PAGE_PSE)
112#define __PAGE_KERNEL_LARGE_NOCACHE (__PAGE_KERNEL | _PAGE_CACHE_UC | _PAGE_PSE) 113#define __PAGE_KERNEL_LARGE_NOCACHE (__PAGE_KERNEL | _PAGE_CACHE_UC | _PAGE_PSE)
113#define __PAGE_KERNEL_LARGE_EXEC (__PAGE_KERNEL_EXEC | _PAGE_PSE) 114#define __PAGE_KERNEL_LARGE_EXEC (__PAGE_KERNEL_EXEC | _PAGE_PSE)
@@ -129,7 +130,8 @@
129#define PAGE_KERNEL_LARGE_NOCACHE __pgprot(__PAGE_KERNEL_LARGE_NOCACHE) 130#define PAGE_KERNEL_LARGE_NOCACHE __pgprot(__PAGE_KERNEL_LARGE_NOCACHE)
130#define PAGE_KERNEL_LARGE_EXEC __pgprot(__PAGE_KERNEL_LARGE_EXEC) 131#define PAGE_KERNEL_LARGE_EXEC __pgprot(__PAGE_KERNEL_LARGE_EXEC)
131#define PAGE_KERNEL_VSYSCALL __pgprot(__PAGE_KERNEL_VSYSCALL) 132#define PAGE_KERNEL_VSYSCALL __pgprot(__PAGE_KERNEL_VSYSCALL)
132#define PAGE_KERNEL_VSYSCALL_NOCACHE __pgprot(__PAGE_KERNEL_VSYSCALL_NOCACHE) 133#define PAGE_KERNEL_VVAR __pgprot(__PAGE_KERNEL_VVAR)
134#define PAGE_KERNEL_VVAR_NOCACHE __pgprot(__PAGE_KERNEL_VVAR_NOCACHE)
133 135
134#define PAGE_KERNEL_IO __pgprot(__PAGE_KERNEL_IO) 136#define PAGE_KERNEL_IO __pgprot(__PAGE_KERNEL_IO)
135#define PAGE_KERNEL_IO_NOCACHE __pgprot(__PAGE_KERNEL_IO_NOCACHE) 137#define PAGE_KERNEL_IO_NOCACHE __pgprot(__PAGE_KERNEL_IO_NOCACHE)
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index 0310da67307f..2bae0a513b40 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -1,6 +1,8 @@
1#ifndef _ASM_X86_TRAPS_H 1#ifndef _ASM_X86_TRAPS_H
2#define _ASM_X86_TRAPS_H 2#define _ASM_X86_TRAPS_H
3 3
4#include <linux/kprobes.h>
5
4#include <asm/debugreg.h> 6#include <asm/debugreg.h>
5#include <asm/siginfo.h> /* TRAP_TRACE, ... */ 7#include <asm/siginfo.h> /* TRAP_TRACE, ... */
6 8
@@ -38,6 +40,7 @@ asmlinkage void alignment_check(void);
38asmlinkage void machine_check(void); 40asmlinkage void machine_check(void);
39#endif /* CONFIG_X86_MCE */ 41#endif /* CONFIG_X86_MCE */
40asmlinkage void simd_coprocessor_error(void); 42asmlinkage void simd_coprocessor_error(void);
43asmlinkage void emulate_vsyscall(void);
41 44
42dotraplinkage void do_divide_error(struct pt_regs *, long); 45dotraplinkage void do_divide_error(struct pt_regs *, long);
43dotraplinkage void do_debug(struct pt_regs *, long); 46dotraplinkage void do_debug(struct pt_regs *, long);
@@ -64,6 +67,7 @@ dotraplinkage void do_alignment_check(struct pt_regs *, long);
64dotraplinkage void do_machine_check(struct pt_regs *, long); 67dotraplinkage void do_machine_check(struct pt_regs *, long);
65#endif 68#endif
66dotraplinkage void do_simd_coprocessor_error(struct pt_regs *, long); 69dotraplinkage void do_simd_coprocessor_error(struct pt_regs *, long);
70dotraplinkage void do_emulate_vsyscall(struct pt_regs *, long);
67#ifdef CONFIG_X86_32 71#ifdef CONFIG_X86_32
68dotraplinkage void do_iret_error(struct pt_regs *, long); 72dotraplinkage void do_iret_error(struct pt_regs *, long);
69#endif 73#endif
diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h
index 9db5583b6d38..83e2efd181e2 100644
--- a/arch/x86/include/asm/tsc.h
+++ b/arch/x86/include/asm/tsc.h
@@ -51,10 +51,6 @@ extern int unsynchronized_tsc(void);
51extern int check_tsc_unstable(void); 51extern int check_tsc_unstable(void);
52extern unsigned long native_calibrate_tsc(void); 52extern unsigned long native_calibrate_tsc(void);
53 53
54#ifdef CONFIG_X86_64
55extern cycles_t vread_tsc(void);
56#endif
57
58/* 54/*
59 * Boot-time check whether the TSCs are synchronized across 55 * Boot-time check whether the TSCs are synchronized across
60 * all CPUs/cores: 56 * all CPUs/cores:
diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h
index 646b4c1ca695..815285bcaceb 100644
--- a/arch/x86/include/asm/vgtod.h
+++ b/arch/x86/include/asm/vgtod.h
@@ -11,10 +11,9 @@ struct vsyscall_gtod_data {
11 time_t wall_time_sec; 11 time_t wall_time_sec;
12 u32 wall_time_nsec; 12 u32 wall_time_nsec;
13 13
14 int sysctl_enabled;
15 struct timezone sys_tz; 14 struct timezone sys_tz;
16 struct { /* extract of a clocksource struct */ 15 struct { /* extract of a clocksource struct */
17 cycle_t (*vread)(void); 16 int vclock_mode;
18 cycle_t cycle_last; 17 cycle_t cycle_last;
19 cycle_t mask; 18 cycle_t mask;
20 u32 mult; 19 u32 mult;
diff --git a/arch/x86/include/asm/vsyscall.h b/arch/x86/include/asm/vsyscall.h
index d55597351f6a..60107072c28b 100644
--- a/arch/x86/include/asm/vsyscall.h
+++ b/arch/x86/include/asm/vsyscall.h
@@ -16,10 +16,6 @@ enum vsyscall_num {
16#ifdef __KERNEL__ 16#ifdef __KERNEL__
17#include <linux/seqlock.h> 17#include <linux/seqlock.h>
18 18
19/* Definitions for CONFIG_GENERIC_TIME definitions */
20#define __vsyscall_fn \
21 __attribute__ ((unused, __section__(".vsyscall_fn"))) notrace
22
23#define VGETCPU_RDTSCP 1 19#define VGETCPU_RDTSCP 1
24#define VGETCPU_LSL 2 20#define VGETCPU_LSL 2
25 21
diff --git a/arch/x86/include/asm/vvar.h b/arch/x86/include/asm/vvar.h
index 341b3559452b..de656ac2af41 100644
--- a/arch/x86/include/asm/vvar.h
+++ b/arch/x86/include/asm/vvar.h
@@ -10,15 +10,14 @@
10 * In normal kernel code, they are used like any other variable. 10 * In normal kernel code, they are used like any other variable.
11 * In user code, they are accessed through the VVAR macro. 11 * In user code, they are accessed through the VVAR macro.
12 * 12 *
13 * Each of these variables lives in the vsyscall page, and each 13 * These variables live in a page of kernel data that has an extra RO
14 * one needs a unique offset within the little piece of the page 14 * mapping for userspace. Each variable needs a unique offset within
15 * reserved for vvars. Specify that offset in DECLARE_VVAR. 15 * that page; specify that offset with the DECLARE_VVAR macro. (If
16 * (There are 896 bytes available. If you mess up, the linker will 16 * you mess up, the linker will catch it.)
17 * catch it.)
18 */ 17 */
19 18
20/* Offset of vars within vsyscall page */ 19/* Base address of vvars. This is not ABI. */
21#define VSYSCALL_VARS_OFFSET (3072 + 128) 20#define VVAR_ADDRESS (-10*1024*1024 - 4096)
22 21
23#if defined(__VVAR_KERNEL_LDS) 22#if defined(__VVAR_KERNEL_LDS)
24 23
@@ -26,17 +25,17 @@
26 * right place. 25 * right place.
27 */ 26 */
28#define DECLARE_VVAR(offset, type, name) \ 27#define DECLARE_VVAR(offset, type, name) \
29 EMIT_VVAR(name, VSYSCALL_VARS_OFFSET + offset) 28 EMIT_VVAR(name, offset)
30 29
31#else 30#else
32 31
33#define DECLARE_VVAR(offset, type, name) \ 32#define DECLARE_VVAR(offset, type, name) \
34 static type const * const vvaraddr_ ## name = \ 33 static type const * const vvaraddr_ ## name = \
35 (void *)(VSYSCALL_START + VSYSCALL_VARS_OFFSET + (offset)); 34 (void *)(VVAR_ADDRESS + (offset));
36 35
37#define DEFINE_VVAR(type, name) \ 36#define DEFINE_VVAR(type, name) \
38 type __vvar_ ## name \ 37 type name \
39 __attribute__((section(".vsyscall_var_" #name), aligned(16))) 38 __attribute__((section(".vvar_" #name), aligned(16)))
40 39
41#define VVAR(name) (*vvaraddr_ ## name) 40#define VVAR(name) (*vvaraddr_ ## name)
42 41
@@ -45,8 +44,7 @@
45/* DECLARE_VVAR(offset, type, name) */ 44/* DECLARE_VVAR(offset, type, name) */
46 45
47DECLARE_VVAR(0, volatile unsigned long, jiffies) 46DECLARE_VVAR(0, volatile unsigned long, jiffies)
48DECLARE_VVAR(8, int, vgetcpu_mode) 47DECLARE_VVAR(16, int, vgetcpu_mode)
49DECLARE_VVAR(128, struct vsyscall_gtod_data, vsyscall_gtod_data) 48DECLARE_VVAR(128, struct vsyscall_gtod_data, vsyscall_gtod_data)
50 49
51#undef DECLARE_VVAR 50#undef DECLARE_VVAR
52#undef VSYSCALL_VARS_OFFSET
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 11817ff85399..04105574c8e9 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -24,17 +24,12 @@ endif
24nostackp := $(call cc-option, -fno-stack-protector) 24nostackp := $(call cc-option, -fno-stack-protector)
25CFLAGS_vsyscall_64.o := $(PROFILING) -g0 $(nostackp) 25CFLAGS_vsyscall_64.o := $(PROFILING) -g0 $(nostackp)
26CFLAGS_hpet.o := $(nostackp) 26CFLAGS_hpet.o := $(nostackp)
27CFLAGS_vread_tsc_64.o := $(nostackp)
28CFLAGS_paravirt.o := $(nostackp) 27CFLAGS_paravirt.o := $(nostackp)
29GCOV_PROFILE_vsyscall_64.o := n 28GCOV_PROFILE_vsyscall_64.o := n
30GCOV_PROFILE_hpet.o := n 29GCOV_PROFILE_hpet.o := n
31GCOV_PROFILE_tsc.o := n 30GCOV_PROFILE_tsc.o := n
32GCOV_PROFILE_vread_tsc_64.o := n
33GCOV_PROFILE_paravirt.o := n 31GCOV_PROFILE_paravirt.o := n
34 32
35# vread_tsc_64 is hot and should be fully optimized:
36CFLAGS_REMOVE_vread_tsc_64.o = -pg -fno-optimize-sibling-calls
37
38obj-y := process_$(BITS).o signal.o entry_$(BITS).o 33obj-y := process_$(BITS).o signal.o entry_$(BITS).o
39obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o 34obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
40obj-y += time.o ioport.o ldt.o dumpstack.o 35obj-y += time.o ioport.o ldt.o dumpstack.o
@@ -43,7 +38,8 @@ obj-$(CONFIG_IRQ_WORK) += irq_work.o
43obj-y += probe_roms.o 38obj-y += probe_roms.o
44obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o 39obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o
45obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o 40obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o
46obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o vread_tsc_64.o 41obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o
42obj-$(CONFIG_X86_64) += vsyscall_emu_64.o
47obj-y += bootflag.o e820.o 43obj-y += bootflag.o e820.o
48obj-y += pci-dma.o quirks.o topology.o kdebugfs.o 44obj-y += pci-dma.o quirks.o topology.o kdebugfs.o
49obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o 45obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index a81f2d52f869..c63822816249 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -14,7 +14,6 @@
14#include <asm/pgtable.h> 14#include <asm/pgtable.h>
15#include <asm/mce.h> 15#include <asm/mce.h>
16#include <asm/nmi.h> 16#include <asm/nmi.h>
17#include <asm/vsyscall.h>
18#include <asm/cacheflush.h> 17#include <asm/cacheflush.h>
19#include <asm/tlbflush.h> 18#include <asm/tlbflush.h>
20#include <asm/io.h> 19#include <asm/io.h>
@@ -250,7 +249,6 @@ static void __init_or_module add_nops(void *insns, unsigned int len)
250 249
251extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; 250extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
252extern s32 __smp_locks[], __smp_locks_end[]; 251extern s32 __smp_locks[], __smp_locks_end[];
253extern char __vsyscall_0;
254void *text_poke_early(void *addr, const void *opcode, size_t len); 252void *text_poke_early(void *addr, const void *opcode, size_t len);
255 253
256/* Replace instructions with better alternatives for this CPU type. 254/* Replace instructions with better alternatives for this CPU type.
@@ -263,6 +261,7 @@ void __init_or_module apply_alternatives(struct alt_instr *start,
263 struct alt_instr *end) 261 struct alt_instr *end)
264{ 262{
265 struct alt_instr *a; 263 struct alt_instr *a;
264 u8 *instr, *replacement;
266 u8 insnbuf[MAX_PATCH_LEN]; 265 u8 insnbuf[MAX_PATCH_LEN];
267 266
268 DPRINTK("%s: alt table %p -> %p\n", __func__, start, end); 267 DPRINTK("%s: alt table %p -> %p\n", __func__, start, end);
@@ -276,25 +275,23 @@ void __init_or_module apply_alternatives(struct alt_instr *start,
276 * order. 275 * order.
277 */ 276 */
278 for (a = start; a < end; a++) { 277 for (a = start; a < end; a++) {
279 u8 *instr = a->instr; 278 instr = (u8 *)&a->instr_offset + a->instr_offset;
279 replacement = (u8 *)&a->repl_offset + a->repl_offset;
280 BUG_ON(a->replacementlen > a->instrlen); 280 BUG_ON(a->replacementlen > a->instrlen);
281 BUG_ON(a->instrlen > sizeof(insnbuf)); 281 BUG_ON(a->instrlen > sizeof(insnbuf));
282 BUG_ON(a->cpuid >= NCAPINTS*32); 282 BUG_ON(a->cpuid >= NCAPINTS*32);
283 if (!boot_cpu_has(a->cpuid)) 283 if (!boot_cpu_has(a->cpuid))
284 continue; 284 continue;
285#ifdef CONFIG_X86_64 285
286 /* vsyscall code is not mapped yet. resolve it manually. */ 286 memcpy(insnbuf, replacement, a->replacementlen);
287 if (instr >= (u8 *)VSYSCALL_START && instr < (u8*)VSYSCALL_END) { 287
288 instr = __va(instr - (u8*)VSYSCALL_START + (u8*)__pa_symbol(&__vsyscall_0)); 288 /* 0xe8 is a relative jump; fix the offset. */
289 DPRINTK("%s: vsyscall fixup: %p => %p\n",
290 __func__, a->instr, instr);
291 }
292#endif
293 memcpy(insnbuf, a->replacement, a->replacementlen);
294 if (*insnbuf == 0xe8 && a->replacementlen == 5) 289 if (*insnbuf == 0xe8 && a->replacementlen == 5)
295 *(s32 *)(insnbuf + 1) += a->replacement - a->instr; 290 *(s32 *)(insnbuf + 1) += replacement - instr;
291
296 add_nops(insnbuf + a->replacementlen, 292 add_nops(insnbuf + a->replacementlen,
297 a->instrlen - a->replacementlen); 293 a->instrlen - a->replacementlen);
294
298 text_poke_early(instr, insnbuf, a->instrlen); 295 text_poke_early(instr, insnbuf, a->instrlen);
299 } 296 }
300} 297}
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 37e895a1c74d..e13329d800c8 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -9,6 +9,8 @@
9/* 9/*
10 * entry.S contains the system-call and fault low-level handling routines. 10 * entry.S contains the system-call and fault low-level handling routines.
11 * 11 *
12 * Some of this is documented in Documentation/x86/entry_64.txt
13 *
12 * NOTE: This code handles signal-recognition, which happens every time 14 * NOTE: This code handles signal-recognition, which happens every time
13 * after an interrupt and after each system call. 15 * after an interrupt and after each system call.
14 * 16 *
@@ -1109,6 +1111,8 @@ zeroentry spurious_interrupt_bug do_spurious_interrupt_bug
1109zeroentry coprocessor_error do_coprocessor_error 1111zeroentry coprocessor_error do_coprocessor_error
1110errorentry alignment_check do_alignment_check 1112errorentry alignment_check do_alignment_check
1111zeroentry simd_coprocessor_error do_simd_coprocessor_error 1113zeroentry simd_coprocessor_error do_simd_coprocessor_error
1114zeroentry emulate_vsyscall do_emulate_vsyscall
1115
1112 1116
1113 /* Reload gs selector with exception handling */ 1117 /* Reload gs selector with exception handling */
1114 /* edi: new selector */ 1118 /* edi: new selector */
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 0f4b0651cd3f..4aecc54236a9 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -72,7 +72,7 @@ static inline void hpet_set_mapping(void)
72{ 72{
73 hpet_virt_address = ioremap_nocache(hpet_address, HPET_MMAP_SIZE); 73 hpet_virt_address = ioremap_nocache(hpet_address, HPET_MMAP_SIZE);
74#ifdef CONFIG_X86_64 74#ifdef CONFIG_X86_64
75 __set_fixmap(VSYSCALL_HPET, hpet_address, PAGE_KERNEL_VSYSCALL_NOCACHE); 75 __set_fixmap(VSYSCALL_HPET, hpet_address, PAGE_KERNEL_VVAR_NOCACHE);
76#endif 76#endif
77} 77}
78 78
@@ -739,13 +739,6 @@ static cycle_t read_hpet(struct clocksource *cs)
739 return (cycle_t)hpet_readl(HPET_COUNTER); 739 return (cycle_t)hpet_readl(HPET_COUNTER);
740} 740}
741 741
742#ifdef CONFIG_X86_64
743static cycle_t __vsyscall_fn vread_hpet(void)
744{
745 return readl((const void __iomem *)fix_to_virt(VSYSCALL_HPET) + 0xf0);
746}
747#endif
748
749static struct clocksource clocksource_hpet = { 742static struct clocksource clocksource_hpet = {
750 .name = "hpet", 743 .name = "hpet",
751 .rating = 250, 744 .rating = 250,
@@ -754,7 +747,7 @@ static struct clocksource clocksource_hpet = {
754 .flags = CLOCK_SOURCE_IS_CONTINUOUS, 747 .flags = CLOCK_SOURCE_IS_CONTINUOUS,
755 .resume = hpet_resume_counter, 748 .resume = hpet_resume_counter,
756#ifdef CONFIG_X86_64 749#ifdef CONFIG_X86_64
757 .vread = vread_hpet, 750 .archdata = { .vclock_mode = VCLOCK_HPET },
758#endif 751#endif
759}; 752};
760 753
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index b9b67166f9de..fbc097a085ca 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -872,6 +872,12 @@ void __init trap_init(void)
872 set_bit(SYSCALL_VECTOR, used_vectors); 872 set_bit(SYSCALL_VECTOR, used_vectors);
873#endif 873#endif
874 874
875#ifdef CONFIG_X86_64
876 BUG_ON(test_bit(VSYSCALL_EMU_VECTOR, used_vectors));
877 set_system_intr_gate(VSYSCALL_EMU_VECTOR, &emulate_vsyscall);
878 set_bit(VSYSCALL_EMU_VECTOR, used_vectors);
879#endif
880
875 /* 881 /*
876 * Should be a barrier for any external CPU state: 882 * Should be a barrier for any external CPU state:
877 */ 883 */
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 6cc6922262af..56c633a5db72 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -777,7 +777,7 @@ static struct clocksource clocksource_tsc = {
777 .flags = CLOCK_SOURCE_IS_CONTINUOUS | 777 .flags = CLOCK_SOURCE_IS_CONTINUOUS |
778 CLOCK_SOURCE_MUST_VERIFY, 778 CLOCK_SOURCE_MUST_VERIFY,
779#ifdef CONFIG_X86_64 779#ifdef CONFIG_X86_64
780 .vread = vread_tsc, 780 .archdata = { .vclock_mode = VCLOCK_TSC },
781#endif 781#endif
782}; 782};
783 783
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 89aed99aafce..4aa9c54a9b76 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -161,50 +161,47 @@ SECTIONS
161 161
162#define VVIRT_OFFSET (VSYSCALL_ADDR - __vsyscall_0) 162#define VVIRT_OFFSET (VSYSCALL_ADDR - __vsyscall_0)
163#define VVIRT(x) (ADDR(x) - VVIRT_OFFSET) 163#define VVIRT(x) (ADDR(x) - VVIRT_OFFSET)
164#define EMIT_VVAR(x, offset) .vsyscall_var_ ## x \
165 ADDR(.vsyscall_0) + offset \
166 : AT(VLOAD(.vsyscall_var_ ## x)) { \
167 *(.vsyscall_var_ ## x) \
168 } \
169 x = VVIRT(.vsyscall_var_ ## x);
170 164
171 . = ALIGN(4096); 165 . = ALIGN(4096);
172 __vsyscall_0 = .; 166 __vsyscall_0 = .;
173 167
174 . = VSYSCALL_ADDR; 168 . = VSYSCALL_ADDR;
175 .vsyscall_0 : AT(VLOAD(.vsyscall_0)) { 169 .vsyscall : AT(VLOAD(.vsyscall)) {
176 *(.vsyscall_0) 170 *(.vsyscall_0)
177 } :user
178 171
179 . = ALIGN(L1_CACHE_BYTES); 172 . = 1024;
180 .vsyscall_fn : AT(VLOAD(.vsyscall_fn)) {
181 *(.vsyscall_fn)
182 }
183
184 .vsyscall_1 ADDR(.vsyscall_0) + 1024: AT(VLOAD(.vsyscall_1)) {
185 *(.vsyscall_1) 173 *(.vsyscall_1)
186 }
187 .vsyscall_2 ADDR(.vsyscall_0) + 2048: AT(VLOAD(.vsyscall_2)) {
188 *(.vsyscall_2)
189 }
190 174
191 .vsyscall_3 ADDR(.vsyscall_0) + 3072: AT(VLOAD(.vsyscall_3)) { 175 . = 2048;
192 *(.vsyscall_3) 176 *(.vsyscall_2)
193 }
194
195#define __VVAR_KERNEL_LDS
196#include <asm/vvar.h>
197#undef __VVAR_KERNEL_LDS
198 177
199 . = __vsyscall_0 + PAGE_SIZE; 178 . = 4096; /* Pad the whole page. */
179 } :user =0xcc
180 . = ALIGN(__vsyscall_0 + PAGE_SIZE, PAGE_SIZE);
200 181
201#undef VSYSCALL_ADDR 182#undef VSYSCALL_ADDR
202#undef VLOAD_OFFSET 183#undef VLOAD_OFFSET
203#undef VLOAD 184#undef VLOAD
204#undef VVIRT_OFFSET 185#undef VVIRT_OFFSET
205#undef VVIRT 186#undef VVIRT
187
188 __vvar_page = .;
189
190 .vvar : AT(ADDR(.vvar) - LOAD_OFFSET) {
191
192 /* Place all vvars at the offsets in asm/vvar.h. */
193#define EMIT_VVAR(name, offset) \
194 . = offset; \
195 *(.vvar_ ## name)
196#define __VVAR_KERNEL_LDS
197#include <asm/vvar.h>
198#undef __VVAR_KERNEL_LDS
206#undef EMIT_VVAR 199#undef EMIT_VVAR
207 200
201 } :data
202
203 . = ALIGN(__vvar_page + PAGE_SIZE, PAGE_SIZE);
204
208#endif /* CONFIG_X86_64 */ 205#endif /* CONFIG_X86_64 */
209 206
210 /* Init code and data - will be freed after init */ 207 /* Init code and data - will be freed after init */
diff --git a/arch/x86/kernel/vread_tsc_64.c b/arch/x86/kernel/vread_tsc_64.c
deleted file mode 100644
index a81aa9e9894c..000000000000
--- a/arch/x86/kernel/vread_tsc_64.c
+++ /dev/null
@@ -1,36 +0,0 @@
1/* This code runs in userspace. */
2
3#define DISABLE_BRANCH_PROFILING
4#include <asm/vgtod.h>
5
6notrace cycle_t __vsyscall_fn vread_tsc(void)
7{
8 cycle_t ret;
9 u64 last;
10
11 /*
12 * Empirically, a fence (of type that depends on the CPU)
13 * before rdtsc is enough to ensure that rdtsc is ordered
14 * with respect to loads. The various CPU manuals are unclear
15 * as to whether rdtsc can be reordered with later loads,
16 * but no one has ever seen it happen.
17 */
18 rdtsc_barrier();
19 ret = (cycle_t)vget_cycles();
20
21 last = VVAR(vsyscall_gtod_data).clock.cycle_last;
22
23 if (likely(ret >= last))
24 return ret;
25
26 /*
27 * GCC likes to generate cmov here, but this branch is extremely
28 * predictable (it's just a funciton of time and the likely is
29 * very likely) and there's a data dependence, so force GCC
30 * to generate a branch instead. I don't barrier() because
31 * we don't actually need a barrier, and if this function
32 * ever gets inlined it will generate worse code.
33 */
34 asm volatile ("");
35 return last;
36}
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index 3e682184d76c..dda7dff9cef7 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -2,6 +2,8 @@
2 * Copyright (C) 2001 Andrea Arcangeli <andrea@suse.de> SuSE 2 * Copyright (C) 2001 Andrea Arcangeli <andrea@suse.de> SuSE
3 * Copyright 2003 Andi Kleen, SuSE Labs. 3 * Copyright 2003 Andi Kleen, SuSE Labs.
4 * 4 *
5 * [ NOTE: this mechanism is now deprecated in favor of the vDSO. ]
6 *
5 * Thanks to hpa@transmeta.com for some useful hint. 7 * Thanks to hpa@transmeta.com for some useful hint.
6 * Special thanks to Ingo Molnar for his early experience with 8 * Special thanks to Ingo Molnar for his early experience with
7 * a different vsyscall implementation for Linux/IA32 and for the name. 9 * a different vsyscall implementation for Linux/IA32 and for the name.
@@ -11,10 +13,9 @@
11 * vsyscalls. One vsyscall can reserve more than 1 slot to avoid 13 * vsyscalls. One vsyscall can reserve more than 1 slot to avoid
12 * jumping out of line if necessary. We cannot add more with this 14 * jumping out of line if necessary. We cannot add more with this
13 * mechanism because older kernels won't return -ENOSYS. 15 * mechanism because older kernels won't return -ENOSYS.
14 * If we want more than four we need a vDSO.
15 * 16 *
16 * Note: the concept clashes with user mode linux. If you use UML and 17 * Note: the concept clashes with user mode linux. UML users should
17 * want per guest time just set the kernel.vsyscall64 sysctl to 0. 18 * use the vDSO.
18 */ 19 */
19 20
20/* Disable profiling for userspace code: */ 21/* Disable profiling for userspace code: */
@@ -32,9 +33,12 @@
32#include <linux/cpu.h> 33#include <linux/cpu.h>
33#include <linux/smp.h> 34#include <linux/smp.h>
34#include <linux/notifier.h> 35#include <linux/notifier.h>
36#include <linux/syscalls.h>
37#include <linux/ratelimit.h>
35 38
36#include <asm/vsyscall.h> 39#include <asm/vsyscall.h>
37#include <asm/pgtable.h> 40#include <asm/pgtable.h>
41#include <asm/compat.h>
38#include <asm/page.h> 42#include <asm/page.h>
39#include <asm/unistd.h> 43#include <asm/unistd.h>
40#include <asm/fixmap.h> 44#include <asm/fixmap.h>
@@ -44,16 +48,12 @@
44#include <asm/desc.h> 48#include <asm/desc.h>
45#include <asm/topology.h> 49#include <asm/topology.h>
46#include <asm/vgtod.h> 50#include <asm/vgtod.h>
47 51#include <asm/traps.h>
48#define __vsyscall(nr) \
49 __attribute__ ((unused, __section__(".vsyscall_" #nr))) notrace
50#define __syscall_clobber "r11","cx","memory"
51 52
52DEFINE_VVAR(int, vgetcpu_mode); 53DEFINE_VVAR(int, vgetcpu_mode);
53DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data) = 54DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data) =
54{ 55{
55 .lock = __SEQLOCK_UNLOCKED(__vsyscall_gtod_data.lock), 56 .lock = __SEQLOCK_UNLOCKED(__vsyscall_gtod_data.lock),
56 .sysctl_enabled = 1,
57}; 57};
58 58
59void update_vsyscall_tz(void) 59void update_vsyscall_tz(void)
@@ -72,179 +72,149 @@ void update_vsyscall(struct timespec *wall_time, struct timespec *wtm,
72 unsigned long flags; 72 unsigned long flags;
73 73
74 write_seqlock_irqsave(&vsyscall_gtod_data.lock, flags); 74 write_seqlock_irqsave(&vsyscall_gtod_data.lock, flags);
75
75 /* copy vsyscall data */ 76 /* copy vsyscall data */
76 vsyscall_gtod_data.clock.vread = clock->vread; 77 vsyscall_gtod_data.clock.vclock_mode = clock->archdata.vclock_mode;
77 vsyscall_gtod_data.clock.cycle_last = clock->cycle_last; 78 vsyscall_gtod_data.clock.cycle_last = clock->cycle_last;
78 vsyscall_gtod_data.clock.mask = clock->mask; 79 vsyscall_gtod_data.clock.mask = clock->mask;
79 vsyscall_gtod_data.clock.mult = mult; 80 vsyscall_gtod_data.clock.mult = mult;
80 vsyscall_gtod_data.clock.shift = clock->shift; 81 vsyscall_gtod_data.clock.shift = clock->shift;
81 vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec; 82 vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec;
82 vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec; 83 vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec;
83 vsyscall_gtod_data.wall_to_monotonic = *wtm; 84 vsyscall_gtod_data.wall_to_monotonic = *wtm;
84 vsyscall_gtod_data.wall_time_coarse = __current_kernel_time(); 85 vsyscall_gtod_data.wall_time_coarse = __current_kernel_time();
86
85 write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags); 87 write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags);
86} 88}
87 89
88/* RED-PEN may want to readd seq locking, but then the variable should be 90static void warn_bad_vsyscall(const char *level, struct pt_regs *regs,
89 * write-once. 91 const char *message)
90 */
91static __always_inline void do_get_tz(struct timezone * tz)
92{ 92{
93 *tz = VVAR(vsyscall_gtod_data).sys_tz; 93 static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST);
94} 94 struct task_struct *tsk;
95 95
96static __always_inline int gettimeofday(struct timeval *tv, struct timezone *tz) 96 if (!show_unhandled_signals || !__ratelimit(&rs))
97{ 97 return;
98 int ret;
99 asm volatile("syscall"
100 : "=a" (ret)
101 : "0" (__NR_gettimeofday),"D" (tv),"S" (tz)
102 : __syscall_clobber );
103 return ret;
104}
105 98
106static __always_inline long time_syscall(long *t) 99 tsk = current;
107{
108 long secs;
109 asm volatile("syscall"
110 : "=a" (secs)
111 : "0" (__NR_time),"D" (t) : __syscall_clobber);
112 return secs;
113}
114 100
115static __always_inline void do_vgettimeofday(struct timeval * tv) 101 printk("%s%s[%d] %s ip:%lx cs:%lx sp:%lx ax:%lx si:%lx di:%lx\n",
116{ 102 level, tsk->comm, task_pid_nr(tsk),
117 cycle_t now, base, mask, cycle_delta; 103 message, regs->ip - 2, regs->cs,
118 unsigned seq; 104 regs->sp, regs->ax, regs->si, regs->di);
119 unsigned long mult, shift, nsec;
120 cycle_t (*vread)(void);
121 do {
122 seq = read_seqbegin(&VVAR(vsyscall_gtod_data).lock);
123
124 vread = VVAR(vsyscall_gtod_data).clock.vread;
125 if (unlikely(!VVAR(vsyscall_gtod_data).sysctl_enabled ||
126 !vread)) {
127 gettimeofday(tv,NULL);
128 return;
129 }
130
131 now = vread();
132 base = VVAR(vsyscall_gtod_data).clock.cycle_last;
133 mask = VVAR(vsyscall_gtod_data).clock.mask;
134 mult = VVAR(vsyscall_gtod_data).clock.mult;
135 shift = VVAR(vsyscall_gtod_data).clock.shift;
136
137 tv->tv_sec = VVAR(vsyscall_gtod_data).wall_time_sec;
138 nsec = VVAR(vsyscall_gtod_data).wall_time_nsec;
139 } while (read_seqretry(&VVAR(vsyscall_gtod_data).lock, seq));
140
141 /* calculate interval: */
142 cycle_delta = (now - base) & mask;
143 /* convert to nsecs: */
144 nsec += (cycle_delta * mult) >> shift;
145
146 while (nsec >= NSEC_PER_SEC) {
147 tv->tv_sec += 1;
148 nsec -= NSEC_PER_SEC;
149 }
150 tv->tv_usec = nsec / NSEC_PER_USEC;
151} 105}
152 106
153int __vsyscall(0) vgettimeofday(struct timeval * tv, struct timezone * tz) 107static int addr_to_vsyscall_nr(unsigned long addr)
154{ 108{
155 if (tv) 109 int nr;
156 do_vgettimeofday(tv);
157 if (tz)
158 do_get_tz(tz);
159 return 0;
160}
161 110
162/* This will break when the xtime seconds get inaccurate, but that is 111 if ((addr & ~0xC00UL) != VSYSCALL_START)
163 * unlikely */ 112 return -EINVAL;
164time_t __vsyscall(1) vtime(time_t *t)
165{
166 unsigned seq;
167 time_t result;
168 if (unlikely(!VVAR(vsyscall_gtod_data).sysctl_enabled))
169 return time_syscall(t);
170 113
171 do { 114 nr = (addr & 0xC00UL) >> 10;
172 seq = read_seqbegin(&VVAR(vsyscall_gtod_data).lock); 115 if (nr >= 3)
116 return -EINVAL;
173 117
174 result = VVAR(vsyscall_gtod_data).wall_time_sec; 118 return nr;
119}
175 120
176 } while (read_seqretry(&VVAR(vsyscall_gtod_data).lock, seq)); 121void dotraplinkage do_emulate_vsyscall(struct pt_regs *regs, long error_code)
122{
123 struct task_struct *tsk;
124 unsigned long caller;
125 int vsyscall_nr;
126 long ret;
127
128 local_irq_enable();
129
130 /*
131 * Real 64-bit user mode code has cs == __USER_CS. Anything else
132 * is bogus.
133 */
134 if (regs->cs != __USER_CS) {
135 /*
136 * If we trapped from kernel mode, we might as well OOPS now
137 * instead of returning to some random address and OOPSing
138 * then.
139 */
140 BUG_ON(!user_mode(regs));
141
142 /* Compat mode and non-compat 32-bit CS should both segfault. */
143 warn_bad_vsyscall(KERN_WARNING, regs,
144 "illegal int 0xcc from 32-bit mode");
145 goto sigsegv;
146 }
177 147
178 if (t) 148 /*
179 *t = result; 149 * x86-ism here: regs->ip points to the instruction after the int 0xcc,
180 return result; 150 * and int 0xcc is two bytes long.
181} 151 */
152 vsyscall_nr = addr_to_vsyscall_nr(regs->ip - 2);
153 if (vsyscall_nr < 0) {
154 warn_bad_vsyscall(KERN_WARNING, regs,
155 "illegal int 0xcc (exploit attempt?)");
156 goto sigsegv;
157 }
182 158
183/* Fast way to get current CPU and node. 159 if (get_user(caller, (unsigned long __user *)regs->sp) != 0) {
184 This helps to do per node and per CPU caches in user space. 160 warn_bad_vsyscall(KERN_WARNING, regs, "int 0xcc with bad stack (exploit attempt?)");
185 The result is not guaranteed without CPU affinity, but usually 161 goto sigsegv;
186 works out because the scheduler tries to keep a thread on the same 162 }
187 CPU.
188 163
189 tcache must point to a two element sized long array. 164 tsk = current;
190 All arguments can be NULL. */ 165 if (seccomp_mode(&tsk->seccomp))
191long __vsyscall(2) 166 do_exit(SIGKILL);
192vgetcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache) 167
193{ 168 switch (vsyscall_nr) {
194 unsigned int p; 169 case 0:
195 unsigned long j = 0; 170 ret = sys_gettimeofday(
196 171 (struct timeval __user *)regs->di,
197 /* Fast cache - only recompute value once per jiffies and avoid 172 (struct timezone __user *)regs->si);
198 relatively costly rdtscp/cpuid otherwise. 173 break;
199 This works because the scheduler usually keeps the process 174
200 on the same CPU and this syscall doesn't guarantee its 175 case 1:
201 results anyways. 176 ret = sys_time((time_t __user *)regs->di);
202 We do this here because otherwise user space would do it on 177 break;
203 its own in a likely inferior way (no access to jiffies). 178
204 If you don't like it pass NULL. */ 179 case 2:
205 if (tcache && tcache->blob[0] == (j = VVAR(jiffies))) { 180 ret = sys_getcpu((unsigned __user *)regs->di,
206 p = tcache->blob[1]; 181 (unsigned __user *)regs->si,
207 } else if (VVAR(vgetcpu_mode) == VGETCPU_RDTSCP) { 182 0);
208 /* Load per CPU data from RDTSCP */ 183 break;
209 native_read_tscp(&p);
210 } else {
211 /* Load per CPU data from GDT */
212 asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG));
213 } 184 }
214 if (tcache) { 185
215 tcache->blob[0] = j; 186 if (ret == -EFAULT) {
216 tcache->blob[1] = p; 187 /*
188 * Bad news -- userspace fed a bad pointer to a vsyscall.
189 *
190 * With a real vsyscall, that would have caused SIGSEGV.
191 * To make writing reliable exploits using the emulated
192 * vsyscalls harder, generate SIGSEGV here as well.
193 */
194 warn_bad_vsyscall(KERN_INFO, regs,
195 "vsyscall fault (exploit attempt?)");
196 goto sigsegv;
217 } 197 }
218 if (cpu)
219 *cpu = p & 0xfff;
220 if (node)
221 *node = p >> 12;
222 return 0;
223}
224 198
225static long __vsyscall(3) venosys_1(void) 199 regs->ax = ret;
226{
227 return -ENOSYS;
228}
229 200
230#ifdef CONFIG_SYSCTL 201 /* Emulate a ret instruction. */
231static ctl_table kernel_table2[] = { 202 regs->ip = caller;
232 { .procname = "vsyscall64", 203 regs->sp += 8;
233 .data = &vsyscall_gtod_data.sysctl_enabled, .maxlen = sizeof(int),
234 .mode = 0644,
235 .proc_handler = proc_dointvec },
236 {}
237};
238 204
239static ctl_table kernel_root_table2[] = { 205 local_irq_disable();
240 { .procname = "kernel", .mode = 0555, 206 return;
241 .child = kernel_table2 }, 207
242 {} 208sigsegv:
243}; 209 regs->ip -= 2; /* The faulting instruction should be the int 0xcc. */
244#endif 210 force_sig(SIGSEGV, current);
211 local_irq_disable();
212}
245 213
246/* Assume __initcall executes before all user space. Hopefully kmod 214/*
247 doesn't violate that. We'll find out if it does. */ 215 * Assume __initcall executes before all user space. Hopefully kmod
216 * doesn't violate that. We'll find out if it does.
217 */
248static void __cpuinit vsyscall_set_cpu(int cpu) 218static void __cpuinit vsyscall_set_cpu(int cpu)
249{ 219{
250 unsigned long d; 220 unsigned long d;
@@ -255,13 +225,15 @@ static void __cpuinit vsyscall_set_cpu(int cpu)
255 if (cpu_has(&cpu_data(cpu), X86_FEATURE_RDTSCP)) 225 if (cpu_has(&cpu_data(cpu), X86_FEATURE_RDTSCP))
256 write_rdtscp_aux((node << 12) | cpu); 226 write_rdtscp_aux((node << 12) | cpu);
257 227
258 /* Store cpu number in limit so that it can be loaded quickly 228 /*
259 in user space in vgetcpu. 229 * Store cpu number in limit so that it can be loaded quickly
260 12 bits for the CPU and 8 bits for the node. */ 230 * in user space in vgetcpu. (12 bits for the CPU and 8 bits for the node)
231 */
261 d = 0x0f40000000000ULL; 232 d = 0x0f40000000000ULL;
262 d |= cpu; 233 d |= cpu;
263 d |= (node & 0xf) << 12; 234 d |= (node & 0xf) << 12;
264 d |= (node >> 4) << 48; 235 d |= (node >> 4) << 48;
236
265 write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_PER_CPU, &d, DESCTYPE_S); 237 write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_PER_CPU, &d, DESCTYPE_S);
266} 238}
267 239
@@ -275,8 +247,10 @@ static int __cpuinit
275cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg) 247cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg)
276{ 248{
277 long cpu = (long)arg; 249 long cpu = (long)arg;
250
278 if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN) 251 if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN)
279 smp_call_function_single(cpu, cpu_vsyscall_init, NULL, 1); 252 smp_call_function_single(cpu, cpu_vsyscall_init, NULL, 1);
253
280 return NOTIFY_DONE; 254 return NOTIFY_DONE;
281} 255}
282 256
@@ -284,25 +258,23 @@ void __init map_vsyscall(void)
284{ 258{
285 extern char __vsyscall_0; 259 extern char __vsyscall_0;
286 unsigned long physaddr_page0 = __pa_symbol(&__vsyscall_0); 260 unsigned long physaddr_page0 = __pa_symbol(&__vsyscall_0);
261 extern char __vvar_page;
262 unsigned long physaddr_vvar_page = __pa_symbol(&__vvar_page);
287 263
288 /* Note that VSYSCALL_MAPPED_PAGES must agree with the code below. */ 264 /* Note that VSYSCALL_MAPPED_PAGES must agree with the code below. */
289 __set_fixmap(VSYSCALL_FIRST_PAGE, physaddr_page0, PAGE_KERNEL_VSYSCALL); 265 __set_fixmap(VSYSCALL_FIRST_PAGE, physaddr_page0, PAGE_KERNEL_VSYSCALL);
266 __set_fixmap(VVAR_PAGE, physaddr_vvar_page, PAGE_KERNEL_VVAR);
267 BUILD_BUG_ON((unsigned long)__fix_to_virt(VVAR_PAGE) != (unsigned long)VVAR_ADDRESS);
290} 268}
291 269
292static int __init vsyscall_init(void) 270static int __init vsyscall_init(void)
293{ 271{
294 BUG_ON(((unsigned long) &vgettimeofday != 272 BUG_ON(VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE));
295 VSYSCALL_ADDR(__NR_vgettimeofday))); 273
296 BUG_ON((unsigned long) &vtime != VSYSCALL_ADDR(__NR_vtime));
297 BUG_ON((VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE)));
298 BUG_ON((unsigned long) &vgetcpu != VSYSCALL_ADDR(__NR_vgetcpu));
299#ifdef CONFIG_SYSCTL
300 register_sysctl_table(kernel_root_table2);
301#endif
302 on_each_cpu(cpu_vsyscall_init, NULL, 1); 274 on_each_cpu(cpu_vsyscall_init, NULL, 1);
303 /* notifier priority > KVM */ 275 /* notifier priority > KVM */
304 hotcpu_notifier(cpu_vsyscall_notifier, 30); 276 hotcpu_notifier(cpu_vsyscall_notifier, 30);
277
305 return 0; 278 return 0;
306} 279}
307
308__initcall(vsyscall_init); 280__initcall(vsyscall_init);
diff --git a/arch/x86/kernel/vsyscall_emu_64.S b/arch/x86/kernel/vsyscall_emu_64.S
new file mode 100644
index 000000000000..ffa845eae5ca
--- /dev/null
+++ b/arch/x86/kernel/vsyscall_emu_64.S
@@ -0,0 +1,27 @@
1/*
2 * vsyscall_emu_64.S: Vsyscall emulation page
3 *
4 * Copyright (c) 2011 Andy Lutomirski
5 *
6 * Subject to the GNU General Public License, version 2
7 */
8
9#include <linux/linkage.h>
10#include <asm/irq_vectors.h>
11
12/* The unused parts of the page are filled with 0xcc by the linker script. */
13
14.section .vsyscall_0, "a"
15ENTRY(vsyscall_0)
16 int $VSYSCALL_EMU_VECTOR
17END(vsyscall_0)
18
19.section .vsyscall_1, "a"
20ENTRY(vsyscall_1)
21 int $VSYSCALL_EMU_VECTOR
22END(vsyscall_1)
23
24.section .vsyscall_2, "a"
25ENTRY(vsyscall_2)
26 int $VSYSCALL_EMU_VECTOR
27END(vsyscall_2)
diff --git a/arch/x86/lib/copy_page_64.S b/arch/x86/lib/copy_page_64.S
index 6fec2d1cebe1..01c805ba5359 100644
--- a/arch/x86/lib/copy_page_64.S
+++ b/arch/x86/lib/copy_page_64.S
@@ -2,6 +2,7 @@
2 2
3#include <linux/linkage.h> 3#include <linux/linkage.h>
4#include <asm/dwarf2.h> 4#include <asm/dwarf2.h>
5#include <asm/alternative-asm.h>
5 6
6 ALIGN 7 ALIGN
7copy_page_c: 8copy_page_c:
@@ -110,10 +111,6 @@ ENDPROC(copy_page)
1102: 1112:
111 .previous 112 .previous
112 .section .altinstructions,"a" 113 .section .altinstructions,"a"
113 .align 8 114 altinstruction_entry copy_page, 1b, X86_FEATURE_REP_GOOD, \
114 .quad copy_page 115 .Lcopy_page_end-copy_page, 2b-1b
115 .quad 1b
116 .word X86_FEATURE_REP_GOOD
117 .byte .Lcopy_page_end - copy_page
118 .byte 2b - 1b
119 .previous 116 .previous
diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S
index d0ec9c2936d7..ee164610ec46 100644
--- a/arch/x86/lib/memmove_64.S
+++ b/arch/x86/lib/memmove_64.S
@@ -9,6 +9,7 @@
9#include <linux/linkage.h> 9#include <linux/linkage.h>
10#include <asm/dwarf2.h> 10#include <asm/dwarf2.h>
11#include <asm/cpufeature.h> 11#include <asm/cpufeature.h>
12#include <asm/alternative-asm.h>
12 13
13#undef memmove 14#undef memmove
14 15
@@ -214,11 +215,9 @@ ENTRY(memmove)
214 .previous 215 .previous
215 216
216 .section .altinstructions,"a" 217 .section .altinstructions,"a"
217 .align 8 218 altinstruction_entry .Lmemmove_begin_forward, \
218 .quad .Lmemmove_begin_forward 219 .Lmemmove_begin_forward_efs,X86_FEATURE_ERMS, \
219 .quad .Lmemmove_begin_forward_efs 220 .Lmemmove_end_forward-.Lmemmove_begin_forward, \
220 .word X86_FEATURE_ERMS 221 .Lmemmove_end_forward_efs-.Lmemmove_begin_forward_efs
221 .byte .Lmemmove_end_forward-.Lmemmove_begin_forward
222 .byte .Lmemmove_end_forward_efs-.Lmemmove_begin_forward_efs
223 .previous 222 .previous
224ENDPROC(memmove) 223ENDPROC(memmove)
diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile
index bef0bc962400..5d179502a52c 100644
--- a/arch/x86/vdso/Makefile
+++ b/arch/x86/vdso/Makefile
@@ -26,6 +26,7 @@ targets += vdso.so vdso.so.dbg vdso.lds $(vobjs-y)
26export CPPFLAGS_vdso.lds += -P -C 26export CPPFLAGS_vdso.lds += -P -C
27 27
28VDSO_LDFLAGS_vdso.lds = -m64 -Wl,-soname=linux-vdso.so.1 \ 28VDSO_LDFLAGS_vdso.lds = -m64 -Wl,-soname=linux-vdso.so.1 \
29 -Wl,--no-undefined \
29 -Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096 30 -Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096
30 31
31$(obj)/vdso.o: $(src)/vdso.S $(obj)/vdso.so 32$(obj)/vdso.o: $(src)/vdso.S $(obj)/vdso.so
diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c
index a724905fdae7..6bc0e723b6e8 100644
--- a/arch/x86/vdso/vclock_gettime.c
+++ b/arch/x86/vdso/vclock_gettime.c
@@ -6,7 +6,6 @@
6 * 6 *
7 * The code should have no internal unresolved relocations. 7 * The code should have no internal unresolved relocations.
8 * Check with readelf after changing. 8 * Check with readelf after changing.
9 * Also alternative() doesn't work.
10 */ 9 */
11 10
12/* Disable profiling for userspace code: */ 11/* Disable profiling for userspace code: */
@@ -17,6 +16,7 @@
17#include <linux/time.h> 16#include <linux/time.h>
18#include <linux/string.h> 17#include <linux/string.h>
19#include <asm/vsyscall.h> 18#include <asm/vsyscall.h>
19#include <asm/fixmap.h>
20#include <asm/vgtod.h> 20#include <asm/vgtod.h>
21#include <asm/timex.h> 21#include <asm/timex.h>
22#include <asm/hpet.h> 22#include <asm/hpet.h>
@@ -25,6 +25,43 @@
25 25
26#define gtod (&VVAR(vsyscall_gtod_data)) 26#define gtod (&VVAR(vsyscall_gtod_data))
27 27
28notrace static cycle_t vread_tsc(void)
29{
30 cycle_t ret;
31 u64 last;
32
33 /*
34 * Empirically, a fence (of type that depends on the CPU)
35 * before rdtsc is enough to ensure that rdtsc is ordered
36 * with respect to loads. The various CPU manuals are unclear
37 * as to whether rdtsc can be reordered with later loads,
38 * but no one has ever seen it happen.
39 */
40 rdtsc_barrier();
41 ret = (cycle_t)vget_cycles();
42
43 last = VVAR(vsyscall_gtod_data).clock.cycle_last;
44
45 if (likely(ret >= last))
46 return ret;
47
48 /*
49 * GCC likes to generate cmov here, but this branch is extremely
50 * predictable (it's just a funciton of time and the likely is
51 * very likely) and there's a data dependence, so force GCC
52 * to generate a branch instead. I don't barrier() because
53 * we don't actually need a barrier, and if this function
54 * ever gets inlined it will generate worse code.
55 */
56 asm volatile ("");
57 return last;
58}
59
60static notrace cycle_t vread_hpet(void)
61{
62 return readl((const void __iomem *)fix_to_virt(VSYSCALL_HPET) + 0xf0);
63}
64
28notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) 65notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
29{ 66{
30 long ret; 67 long ret;
@@ -36,9 +73,12 @@ notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
36notrace static inline long vgetns(void) 73notrace static inline long vgetns(void)
37{ 74{
38 long v; 75 long v;
39 cycles_t (*vread)(void); 76 cycles_t cycles;
40 vread = gtod->clock.vread; 77 if (gtod->clock.vclock_mode == VCLOCK_TSC)
41 v = (vread() - gtod->clock.cycle_last) & gtod->clock.mask; 78 cycles = vread_tsc();
79 else
80 cycles = vread_hpet();
81 v = (cycles - gtod->clock.cycle_last) & gtod->clock.mask;
42 return (v * gtod->clock.mult) >> gtod->clock.shift; 82 return (v * gtod->clock.mult) >> gtod->clock.shift;
43} 83}
44 84
@@ -116,21 +156,21 @@ notrace static noinline int do_monotonic_coarse(struct timespec *ts)
116 156
117notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts) 157notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
118{ 158{
119 if (likely(gtod->sysctl_enabled)) 159 switch (clock) {
120 switch (clock) { 160 case CLOCK_REALTIME:
121 case CLOCK_REALTIME: 161 if (likely(gtod->clock.vclock_mode != VCLOCK_NONE))
122 if (likely(gtod->clock.vread)) 162 return do_realtime(ts);
123 return do_realtime(ts); 163 break;
124 break; 164 case CLOCK_MONOTONIC:
125 case CLOCK_MONOTONIC: 165 if (likely(gtod->clock.vclock_mode != VCLOCK_NONE))
126 if (likely(gtod->clock.vread)) 166 return do_monotonic(ts);
127 return do_monotonic(ts); 167 break;
128 break; 168 case CLOCK_REALTIME_COARSE:
129 case CLOCK_REALTIME_COARSE: 169 return do_realtime_coarse(ts);
130 return do_realtime_coarse(ts); 170 case CLOCK_MONOTONIC_COARSE:
131 case CLOCK_MONOTONIC_COARSE: 171 return do_monotonic_coarse(ts);
132 return do_monotonic_coarse(ts); 172 }
133 } 173
134 return vdso_fallback_gettime(clock, ts); 174 return vdso_fallback_gettime(clock, ts);
135} 175}
136int clock_gettime(clockid_t, struct timespec *) 176int clock_gettime(clockid_t, struct timespec *)
@@ -139,7 +179,7 @@ int clock_gettime(clockid_t, struct timespec *)
139notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz) 179notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
140{ 180{
141 long ret; 181 long ret;
142 if (likely(gtod->sysctl_enabled && gtod->clock.vread)) { 182 if (likely(gtod->clock.vclock_mode != VCLOCK_NONE)) {
143 if (likely(tv != NULL)) { 183 if (likely(tv != NULL)) {
144 BUILD_BUG_ON(offsetof(struct timeval, tv_usec) != 184 BUILD_BUG_ON(offsetof(struct timeval, tv_usec) !=
145 offsetof(struct timespec, tv_nsec) || 185 offsetof(struct timespec, tv_nsec) ||
@@ -161,27 +201,14 @@ notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
161int gettimeofday(struct timeval *, struct timezone *) 201int gettimeofday(struct timeval *, struct timezone *)
162 __attribute__((weak, alias("__vdso_gettimeofday"))); 202 __attribute__((weak, alias("__vdso_gettimeofday")));
163 203
164/* This will break when the xtime seconds get inaccurate, but that is 204/*
165 * unlikely */ 205 * This will break when the xtime seconds get inaccurate, but that is
166 206 * unlikely
167static __always_inline long time_syscall(long *t) 207 */
168{
169 long secs;
170 asm volatile("syscall"
171 : "=a" (secs)
172 : "0" (__NR_time), "D" (t) : "cc", "r11", "cx", "memory");
173 return secs;
174}
175
176notrace time_t __vdso_time(time_t *t) 208notrace time_t __vdso_time(time_t *t)
177{ 209{
178 time_t result;
179
180 if (unlikely(!VVAR(vsyscall_gtod_data).sysctl_enabled))
181 return time_syscall(t);
182
183 /* This is atomic on x86_64 so we don't need any locks. */ 210 /* This is atomic on x86_64 so we don't need any locks. */
184 result = ACCESS_ONCE(VVAR(vsyscall_gtod_data).wall_time_sec); 211 time_t result = ACCESS_ONCE(VVAR(vsyscall_gtod_data).wall_time_sec);
185 212
186 if (t) 213 if (t)
187 *t = result; 214 *t = result;
diff --git a/arch/x86/vdso/vdso.S b/arch/x86/vdso/vdso.S
index 1d3aa6b87181..1b979c12ba85 100644
--- a/arch/x86/vdso/vdso.S
+++ b/arch/x86/vdso/vdso.S
@@ -1,10 +1,21 @@
1#include <asm/page_types.h>
2#include <linux/linkage.h>
1#include <linux/init.h> 3#include <linux/init.h>
2 4
3__INITDATA 5__PAGE_ALIGNED_DATA
4 6
5 .globl vdso_start, vdso_end 7 .globl vdso_start, vdso_end
8 .align PAGE_SIZE
6vdso_start: 9vdso_start:
7 .incbin "arch/x86/vdso/vdso.so" 10 .incbin "arch/x86/vdso/vdso.so"
8vdso_end: 11vdso_end:
9 12
10__FINIT 13.previous
14
15 .globl vdso_pages
16 .bss
17 .align 8
18 .type vdso_pages, @object
19vdso_pages:
20 .zero (vdso_end - vdso_start + PAGE_SIZE - 1) / PAGE_SIZE * 8
21 .size vdso_pages, .-vdso_pages
diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c
index 7abd2be0f9b9..316fbca3490e 100644
--- a/arch/x86/vdso/vma.c
+++ b/arch/x86/vdso/vma.c
@@ -14,41 +14,61 @@
14#include <asm/vgtod.h> 14#include <asm/vgtod.h>
15#include <asm/proto.h> 15#include <asm/proto.h>
16#include <asm/vdso.h> 16#include <asm/vdso.h>
17#include <asm/page.h>
17 18
18unsigned int __read_mostly vdso_enabled = 1; 19unsigned int __read_mostly vdso_enabled = 1;
19 20
20extern char vdso_start[], vdso_end[]; 21extern char vdso_start[], vdso_end[];
21extern unsigned short vdso_sync_cpuid; 22extern unsigned short vdso_sync_cpuid;
22 23
23static struct page **vdso_pages; 24extern struct page *vdso_pages[];
24static unsigned vdso_size; 25static unsigned vdso_size;
25 26
26static int __init init_vdso_vars(void) 27static void __init patch_vdso(void *vdso, size_t len)
28{
29 Elf64_Ehdr *hdr = vdso;
30 Elf64_Shdr *sechdrs, *alt_sec = 0;
31 char *secstrings;
32 void *alt_data;
33 int i;
34
35 BUG_ON(len < sizeof(Elf64_Ehdr));
36 BUG_ON(memcmp(hdr->e_ident, ELFMAG, SELFMAG) != 0);
37
38 sechdrs = (void *)hdr + hdr->e_shoff;
39 secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
40
41 for (i = 1; i < hdr->e_shnum; i++) {
42 Elf64_Shdr *shdr = &sechdrs[i];
43 if (!strcmp(secstrings + shdr->sh_name, ".altinstructions")) {
44 alt_sec = shdr;
45 goto found;
46 }
47 }
48
49 /* If we get here, it's probably a bug. */
50 pr_warning("patch_vdso: .altinstructions not found\n");
51 return; /* nothing to patch */
52
53found:
54 alt_data = (void *)hdr + alt_sec->sh_offset;
55 apply_alternatives(alt_data, alt_data + alt_sec->sh_size);
56}
57
58static int __init init_vdso(void)
27{ 59{
28 int npages = (vdso_end - vdso_start + PAGE_SIZE - 1) / PAGE_SIZE; 60 int npages = (vdso_end - vdso_start + PAGE_SIZE - 1) / PAGE_SIZE;
29 int i; 61 int i;
30 62
63 patch_vdso(vdso_start, vdso_end - vdso_start);
64
31 vdso_size = npages << PAGE_SHIFT; 65 vdso_size = npages << PAGE_SHIFT;
32 vdso_pages = kmalloc(sizeof(struct page *) * npages, GFP_KERNEL); 66 for (i = 0; i < npages; i++)
33 if (!vdso_pages) 67 vdso_pages[i] = virt_to_page(vdso_start + i*PAGE_SIZE);
34 goto oom;
35 for (i = 0; i < npages; i++) {
36 struct page *p;
37 p = alloc_page(GFP_KERNEL);
38 if (!p)
39 goto oom;
40 vdso_pages[i] = p;
41 copy_page(page_address(p), vdso_start + i*PAGE_SIZE);
42 }
43 68
44 return 0; 69 return 0;
45
46 oom:
47 printk("Cannot allocate vdso\n");
48 vdso_enabled = 0;
49 return -ENOMEM;
50} 70}
51subsys_initcall(init_vdso_vars); 71subsys_initcall(init_vdso);
52 72
53struct linux_binprm; 73struct linux_binprm;
54 74