diff options
-rw-r--r-- | arch/x86/include/asm/irq_vectors.h | 6 | ||||
-rw-r--r-- | arch/x86/include/asm/traps.h | 4 | ||||
-rw-r--r-- | arch/x86/include/asm/vsyscall.h | 12 | ||||
-rw-r--r-- | arch/x86/kernel/Makefile | 1 | ||||
-rw-r--r-- | arch/x86/kernel/entry_64.S | 2 | ||||
-rw-r--r-- | arch/x86/kernel/traps.c | 6 | ||||
-rw-r--r-- | arch/x86/kernel/vsyscall_64.c | 261 | ||||
-rw-r--r-- | arch/x86/kernel/vsyscall_emu_64.S | 27 | ||||
-rw-r--r-- | include/linux/seccomp.h | 10 |
9 files changed, 189 insertions, 140 deletions
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 6e976ee3b3ef..a563c509edcb 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h | |||
@@ -17,7 +17,8 @@ | |||
17 | * Vectors 0 ... 31 : system traps and exceptions - hardcoded events | 17 | * Vectors 0 ... 31 : system traps and exceptions - hardcoded events |
18 | * Vectors 32 ... 127 : device interrupts | 18 | * Vectors 32 ... 127 : device interrupts |
19 | * Vector 128 : legacy int80 syscall interface | 19 | * Vector 128 : legacy int80 syscall interface |
20 | * Vectors 129 ... INVALIDATE_TLB_VECTOR_START-1 : device interrupts | 20 | * Vector 204 : legacy x86_64 vsyscall emulation |
21 | * Vectors 129 ... INVALIDATE_TLB_VECTOR_START-1 except 204 : device interrupts | ||
21 | * Vectors INVALIDATE_TLB_VECTOR_START ... 255 : special interrupts | 22 | * Vectors INVALIDATE_TLB_VECTOR_START ... 255 : special interrupts |
22 | * | 23 | * |
23 | * 64-bit x86 has per CPU IDT tables, 32-bit has one shared IDT table. | 24 | * 64-bit x86 has per CPU IDT tables, 32-bit has one shared IDT table. |
@@ -50,6 +51,9 @@ | |||
50 | #ifdef CONFIG_X86_32 | 51 | #ifdef CONFIG_X86_32 |
51 | # define SYSCALL_VECTOR 0x80 | 52 | # define SYSCALL_VECTOR 0x80 |
52 | #endif | 53 | #endif |
54 | #ifdef CONFIG_X86_64 | ||
55 | # define VSYSCALL_EMU_VECTOR 0xcc | ||
56 | #endif | ||
53 | 57 | ||
54 | /* | 58 | /* |
55 | * Vectors 0x30-0x3f are used for ISA interrupts. | 59 | * Vectors 0x30-0x3f are used for ISA interrupts. |
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index 0310da67307f..2bae0a513b40 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h | |||
@@ -1,6 +1,8 @@ | |||
1 | #ifndef _ASM_X86_TRAPS_H | 1 | #ifndef _ASM_X86_TRAPS_H |
2 | #define _ASM_X86_TRAPS_H | 2 | #define _ASM_X86_TRAPS_H |
3 | 3 | ||
4 | #include <linux/kprobes.h> | ||
5 | |||
4 | #include <asm/debugreg.h> | 6 | #include <asm/debugreg.h> |
5 | #include <asm/siginfo.h> /* TRAP_TRACE, ... */ | 7 | #include <asm/siginfo.h> /* TRAP_TRACE, ... */ |
6 | 8 | ||
@@ -38,6 +40,7 @@ asmlinkage void alignment_check(void); | |||
38 | asmlinkage void machine_check(void); | 40 | asmlinkage void machine_check(void); |
39 | #endif /* CONFIG_X86_MCE */ | 41 | #endif /* CONFIG_X86_MCE */ |
40 | asmlinkage void simd_coprocessor_error(void); | 42 | asmlinkage void simd_coprocessor_error(void); |
43 | asmlinkage void emulate_vsyscall(void); | ||
41 | 44 | ||
42 | dotraplinkage void do_divide_error(struct pt_regs *, long); | 45 | dotraplinkage void do_divide_error(struct pt_regs *, long); |
43 | dotraplinkage void do_debug(struct pt_regs *, long); | 46 | dotraplinkage void do_debug(struct pt_regs *, long); |
@@ -64,6 +67,7 @@ dotraplinkage void do_alignment_check(struct pt_regs *, long); | |||
64 | dotraplinkage void do_machine_check(struct pt_regs *, long); | 67 | dotraplinkage void do_machine_check(struct pt_regs *, long); |
65 | #endif | 68 | #endif |
66 | dotraplinkage void do_simd_coprocessor_error(struct pt_regs *, long); | 69 | dotraplinkage void do_simd_coprocessor_error(struct pt_regs *, long); |
70 | dotraplinkage void do_emulate_vsyscall(struct pt_regs *, long); | ||
67 | #ifdef CONFIG_X86_32 | 71 | #ifdef CONFIG_X86_32 |
68 | dotraplinkage void do_iret_error(struct pt_regs *, long); | 72 | dotraplinkage void do_iret_error(struct pt_regs *, long); |
69 | #endif | 73 | #endif |
diff --git a/arch/x86/include/asm/vsyscall.h b/arch/x86/include/asm/vsyscall.h index d55597351f6a..bb710cb0cdc1 100644 --- a/arch/x86/include/asm/vsyscall.h +++ b/arch/x86/include/asm/vsyscall.h | |||
@@ -31,6 +31,18 @@ extern struct timezone sys_tz; | |||
31 | 31 | ||
32 | extern void map_vsyscall(void); | 32 | extern void map_vsyscall(void); |
33 | 33 | ||
34 | /* Emulation */ | ||
35 | |||
36 | static inline bool is_vsyscall_entry(unsigned long addr) | ||
37 | { | ||
38 | return (addr & ~0xC00UL) == VSYSCALL_START; | ||
39 | } | ||
40 | |||
41 | static inline int vsyscall_entry_nr(unsigned long addr) | ||
42 | { | ||
43 | return (addr & 0xC00UL) >> 10; | ||
44 | } | ||
45 | |||
34 | #endif /* __KERNEL__ */ | 46 | #endif /* __KERNEL__ */ |
35 | 47 | ||
36 | #endif /* _ASM_X86_VSYSCALL_H */ | 48 | #endif /* _ASM_X86_VSYSCALL_H */ |
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 90b06d4daee2..cc0469a65120 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
@@ -44,6 +44,7 @@ obj-y += probe_roms.o | |||
44 | obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o | 44 | obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o |
45 | obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o | 45 | obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o |
46 | obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o vread_tsc_64.o | 46 | obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o vread_tsc_64.o |
47 | obj-$(CONFIG_X86_64) += vsyscall_emu_64.o | ||
47 | obj-y += bootflag.o e820.o | 48 | obj-y += bootflag.o e820.o |
48 | obj-y += pci-dma.o quirks.o topology.o kdebugfs.o | 49 | obj-y += pci-dma.o quirks.o topology.o kdebugfs.o |
49 | obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o | 50 | obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o |
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 72c4a777bb91..e949793d6b93 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
@@ -1123,6 +1123,8 @@ zeroentry spurious_interrupt_bug do_spurious_interrupt_bug | |||
1123 | zeroentry coprocessor_error do_coprocessor_error | 1123 | zeroentry coprocessor_error do_coprocessor_error |
1124 | errorentry alignment_check do_alignment_check | 1124 | errorentry alignment_check do_alignment_check |
1125 | zeroentry simd_coprocessor_error do_simd_coprocessor_error | 1125 | zeroentry simd_coprocessor_error do_simd_coprocessor_error |
1126 | zeroentry emulate_vsyscall do_emulate_vsyscall | ||
1127 | |||
1126 | 1128 | ||
1127 | /* Reload gs selector with exception handling */ | 1129 | /* Reload gs selector with exception handling */ |
1128 | /* edi: new selector */ | 1130 | /* edi: new selector */ |
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index b9b67166f9de..fbc097a085ca 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c | |||
@@ -872,6 +872,12 @@ void __init trap_init(void) | |||
872 | set_bit(SYSCALL_VECTOR, used_vectors); | 872 | set_bit(SYSCALL_VECTOR, used_vectors); |
873 | #endif | 873 | #endif |
874 | 874 | ||
875 | #ifdef CONFIG_X86_64 | ||
876 | BUG_ON(test_bit(VSYSCALL_EMU_VECTOR, used_vectors)); | ||
877 | set_system_intr_gate(VSYSCALL_EMU_VECTOR, &emulate_vsyscall); | ||
878 | set_bit(VSYSCALL_EMU_VECTOR, used_vectors); | ||
879 | #endif | ||
880 | |||
875 | /* | 881 | /* |
876 | * Should be a barrier for any external CPU state: | 882 | * Should be a barrier for any external CPU state: |
877 | */ | 883 | */ |
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index 70a5f6eebd6c..10cd8ac3395a 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c | |||
@@ -2,6 +2,8 @@ | |||
2 | * Copyright (C) 2001 Andrea Arcangeli <andrea@suse.de> SuSE | 2 | * Copyright (C) 2001 Andrea Arcangeli <andrea@suse.de> SuSE |
3 | * Copyright 2003 Andi Kleen, SuSE Labs. | 3 | * Copyright 2003 Andi Kleen, SuSE Labs. |
4 | * | 4 | * |
5 | * [ NOTE: this mechanism is now deprecated in favor of the vDSO. ] | ||
6 | * | ||
5 | * Thanks to hpa@transmeta.com for some useful hint. | 7 | * Thanks to hpa@transmeta.com for some useful hint. |
6 | * Special thanks to Ingo Molnar for his early experience with | 8 | * Special thanks to Ingo Molnar for his early experience with |
7 | * a different vsyscall implementation for Linux/IA32 and for the name. | 9 | * a different vsyscall implementation for Linux/IA32 and for the name. |
@@ -11,10 +13,9 @@ | |||
11 | * vsyscalls. One vsyscall can reserve more than 1 slot to avoid | 13 | * vsyscalls. One vsyscall can reserve more than 1 slot to avoid |
12 | * jumping out of line if necessary. We cannot add more with this | 14 | * jumping out of line if necessary. We cannot add more with this |
13 | * mechanism because older kernels won't return -ENOSYS. | 15 | * mechanism because older kernels won't return -ENOSYS. |
14 | * If we want more than four we need a vDSO. | ||
15 | * | 16 | * |
16 | * Note: the concept clashes with user mode linux. If you use UML and | 17 | * Note: the concept clashes with user mode linux. UML users should |
17 | * want per guest time just set the kernel.vsyscall64 sysctl to 0. | 18 | * use the vDSO. |
18 | */ | 19 | */ |
19 | 20 | ||
20 | /* Disable profiling for userspace code: */ | 21 | /* Disable profiling for userspace code: */ |
@@ -32,6 +33,8 @@ | |||
32 | #include <linux/cpu.h> | 33 | #include <linux/cpu.h> |
33 | #include <linux/smp.h> | 34 | #include <linux/smp.h> |
34 | #include <linux/notifier.h> | 35 | #include <linux/notifier.h> |
36 | #include <linux/syscalls.h> | ||
37 | #include <linux/ratelimit.h> | ||
35 | 38 | ||
36 | #include <asm/vsyscall.h> | 39 | #include <asm/vsyscall.h> |
37 | #include <asm/pgtable.h> | 40 | #include <asm/pgtable.h> |
@@ -44,10 +47,7 @@ | |||
44 | #include <asm/desc.h> | 47 | #include <asm/desc.h> |
45 | #include <asm/topology.h> | 48 | #include <asm/topology.h> |
46 | #include <asm/vgtod.h> | 49 | #include <asm/vgtod.h> |
47 | 50 | #include <asm/traps.h> | |
48 | #define __vsyscall(nr) \ | ||
49 | __attribute__ ((unused, __section__(".vsyscall_" #nr))) notrace | ||
50 | #define __syscall_clobber "r11","cx","memory" | ||
51 | 51 | ||
52 | DEFINE_VVAR(int, vgetcpu_mode); | 52 | DEFINE_VVAR(int, vgetcpu_mode); |
53 | DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data) = | 53 | DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data) = |
@@ -71,146 +71,129 @@ void update_vsyscall(struct timespec *wall_time, struct timespec *wtm, | |||
71 | unsigned long flags; | 71 | unsigned long flags; |
72 | 72 | ||
73 | write_seqlock_irqsave(&vsyscall_gtod_data.lock, flags); | 73 | write_seqlock_irqsave(&vsyscall_gtod_data.lock, flags); |
74 | |||
74 | /* copy vsyscall data */ | 75 | /* copy vsyscall data */ |
75 | vsyscall_gtod_data.clock.vread = clock->vread; | 76 | vsyscall_gtod_data.clock.vread = clock->vread; |
76 | vsyscall_gtod_data.clock.cycle_last = clock->cycle_last; | 77 | vsyscall_gtod_data.clock.cycle_last = clock->cycle_last; |
77 | vsyscall_gtod_data.clock.mask = clock->mask; | 78 | vsyscall_gtod_data.clock.mask = clock->mask; |
78 | vsyscall_gtod_data.clock.mult = mult; | 79 | vsyscall_gtod_data.clock.mult = mult; |
79 | vsyscall_gtod_data.clock.shift = clock->shift; | 80 | vsyscall_gtod_data.clock.shift = clock->shift; |
80 | vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec; | 81 | vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec; |
81 | vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec; | 82 | vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec; |
82 | vsyscall_gtod_data.wall_to_monotonic = *wtm; | 83 | vsyscall_gtod_data.wall_to_monotonic = *wtm; |
83 | vsyscall_gtod_data.wall_time_coarse = __current_kernel_time(); | 84 | vsyscall_gtod_data.wall_time_coarse = __current_kernel_time(); |
85 | |||
84 | write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags); | 86 | write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags); |
85 | } | 87 | } |
86 | 88 | ||
87 | /* RED-PEN may want to readd seq locking, but then the variable should be | 89 | static void warn_bad_vsyscall(const char *level, struct pt_regs *regs, |
88 | * write-once. | 90 | const char *message) |
89 | */ | ||
90 | static __always_inline void do_get_tz(struct timezone * tz) | ||
91 | { | 91 | { |
92 | *tz = VVAR(vsyscall_gtod_data).sys_tz; | 92 | static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST); |
93 | } | 93 | struct task_struct *tsk; |
94 | 94 | ||
95 | static __always_inline int gettimeofday(struct timeval *tv, struct timezone *tz) | 95 | if (!show_unhandled_signals || !__ratelimit(&rs)) |
96 | { | 96 | return; |
97 | int ret; | ||
98 | asm volatile("syscall" | ||
99 | : "=a" (ret) | ||
100 | : "0" (__NR_gettimeofday),"D" (tv),"S" (tz) | ||
101 | : __syscall_clobber ); | ||
102 | return ret; | ||
103 | } | ||
104 | 97 | ||
105 | static __always_inline void do_vgettimeofday(struct timeval * tv) | 98 | tsk = current; |
106 | { | ||
107 | cycle_t now, base, mask, cycle_delta; | ||
108 | unsigned seq; | ||
109 | unsigned long mult, shift, nsec; | ||
110 | cycle_t (*vread)(void); | ||
111 | do { | ||
112 | seq = read_seqbegin(&VVAR(vsyscall_gtod_data).lock); | ||
113 | |||
114 | vread = VVAR(vsyscall_gtod_data).clock.vread; | ||
115 | if (unlikely(!vread)) { | ||
116 | gettimeofday(tv,NULL); | ||
117 | return; | ||
118 | } | ||
119 | |||
120 | now = vread(); | ||
121 | base = VVAR(vsyscall_gtod_data).clock.cycle_last; | ||
122 | mask = VVAR(vsyscall_gtod_data).clock.mask; | ||
123 | mult = VVAR(vsyscall_gtod_data).clock.mult; | ||
124 | shift = VVAR(vsyscall_gtod_data).clock.shift; | ||
125 | |||
126 | tv->tv_sec = VVAR(vsyscall_gtod_data).wall_time_sec; | ||
127 | nsec = VVAR(vsyscall_gtod_data).wall_time_nsec; | ||
128 | } while (read_seqretry(&VVAR(vsyscall_gtod_data).lock, seq)); | ||
129 | |||
130 | /* calculate interval: */ | ||
131 | cycle_delta = (now - base) & mask; | ||
132 | /* convert to nsecs: */ | ||
133 | nsec += (cycle_delta * mult) >> shift; | ||
134 | |||
135 | while (nsec >= NSEC_PER_SEC) { | ||
136 | tv->tv_sec += 1; | ||
137 | nsec -= NSEC_PER_SEC; | ||
138 | } | ||
139 | tv->tv_usec = nsec / NSEC_PER_USEC; | ||
140 | } | ||
141 | 99 | ||
142 | int __vsyscall(0) vgettimeofday(struct timeval * tv, struct timezone * tz) | 100 | printk("%s%s[%d] %s ip:%lx sp:%lx ax:%lx si:%lx di:%lx\n", |
143 | { | 101 | level, tsk->comm, task_pid_nr(tsk), |
144 | if (tv) | 102 | message, regs->ip - 2, regs->sp, regs->ax, regs->si, regs->di); |
145 | do_vgettimeofday(tv); | ||
146 | if (tz) | ||
147 | do_get_tz(tz); | ||
148 | return 0; | ||
149 | } | 103 | } |
150 | 104 | ||
151 | /* This will break when the xtime seconds get inaccurate, but that is | 105 | void dotraplinkage do_emulate_vsyscall(struct pt_regs *regs, long error_code) |
152 | * unlikely */ | ||
153 | time_t __vsyscall(1) vtime(time_t *t) | ||
154 | { | 106 | { |
155 | unsigned seq; | 107 | const char *vsyscall_name; |
156 | time_t result; | 108 | struct task_struct *tsk; |
109 | unsigned long caller; | ||
110 | int vsyscall_nr; | ||
111 | long ret; | ||
112 | |||
113 | /* Kernel code must never get here. */ | ||
114 | BUG_ON(!user_mode(regs)); | ||
115 | |||
116 | local_irq_enable(); | ||
117 | |||
118 | /* | ||
119 | * x86-ism here: regs->ip points to the instruction after the int 0xcc, | ||
120 | * and int 0xcc is two bytes long. | ||
121 | */ | ||
122 | if (!is_vsyscall_entry(regs->ip - 2)) { | ||
123 | warn_bad_vsyscall(KERN_WARNING, regs, "illegal int 0xcc (exploit attempt?)"); | ||
124 | goto sigsegv; | ||
125 | } | ||
126 | vsyscall_nr = vsyscall_entry_nr(regs->ip - 2); | ||
157 | 127 | ||
158 | do { | 128 | if (get_user(caller, (unsigned long __user *)regs->sp) != 0) { |
159 | seq = read_seqbegin(&VVAR(vsyscall_gtod_data).lock); | 129 | warn_bad_vsyscall(KERN_WARNING, regs, "int 0xcc with bad stack (exploit attempt?)"); |
130 | goto sigsegv; | ||
131 | } | ||
160 | 132 | ||
161 | result = VVAR(vsyscall_gtod_data).wall_time_sec; | 133 | tsk = current; |
134 | if (seccomp_mode(&tsk->seccomp)) | ||
135 | do_exit(SIGKILL); | ||
136 | |||
137 | switch (vsyscall_nr) { | ||
138 | case 0: | ||
139 | vsyscall_name = "gettimeofday"; | ||
140 | ret = sys_gettimeofday( | ||
141 | (struct timeval __user *)regs->di, | ||
142 | (struct timezone __user *)regs->si); | ||
143 | break; | ||
144 | |||
145 | case 1: | ||
146 | vsyscall_name = "time"; | ||
147 | ret = sys_time((time_t __user *)regs->di); | ||
148 | break; | ||
149 | |||
150 | case 2: | ||
151 | vsyscall_name = "getcpu"; | ||
152 | ret = sys_getcpu((unsigned __user *)regs->di, | ||
153 | (unsigned __user *)regs->si, | ||
154 | 0); | ||
155 | break; | ||
156 | |||
157 | default: | ||
158 | /* | ||
159 | * If we get here, then vsyscall_nr indicates that int 0xcc | ||
160 | * happened at an address in the vsyscall page that doesn't | ||
161 | * contain int 0xcc. That can't happen. | ||
162 | */ | ||
163 | BUG(); | ||
164 | } | ||
162 | 165 | ||
163 | } while (read_seqretry(&VVAR(vsyscall_gtod_data).lock, seq)); | 166 | if (ret == -EFAULT) { |
167 | /* | ||
168 | * Bad news -- userspace fed a bad pointer to a vsyscall. | ||
169 | * | ||
170 | * With a real vsyscall, that would have caused SIGSEGV. | ||
171 | * To make writing reliable exploits using the emulated | ||
172 | * vsyscalls harder, generate SIGSEGV here as well. | ||
173 | */ | ||
174 | warn_bad_vsyscall(KERN_INFO, regs, | ||
175 | "vsyscall fault (exploit attempt?)"); | ||
176 | goto sigsegv; | ||
177 | } | ||
164 | 178 | ||
165 | if (t) | 179 | regs->ax = ret; |
166 | *t = result; | ||
167 | return result; | ||
168 | } | ||
169 | 180 | ||
170 | /* Fast way to get current CPU and node. | 181 | /* Emulate a ret instruction. */ |
171 | This helps to do per node and per CPU caches in user space. | 182 | regs->ip = caller; |
172 | The result is not guaranteed without CPU affinity, but usually | 183 | regs->sp += 8; |
173 | works out because the scheduler tries to keep a thread on the same | ||
174 | CPU. | ||
175 | 184 | ||
176 | tcache must point to a two element sized long array. | 185 | local_irq_disable(); |
177 | All arguments can be NULL. */ | 186 | return; |
178 | long __vsyscall(2) | 187 | |
179 | vgetcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache) | 188 | sigsegv: |
180 | { | 189 | regs->ip -= 2; /* The faulting instruction should be the int 0xcc. */ |
181 | unsigned int p; | 190 | force_sig(SIGSEGV, current); |
182 | unsigned long j = 0; | ||
183 | |||
184 | /* Fast cache - only recompute value once per jiffies and avoid | ||
185 | relatively costly rdtscp/cpuid otherwise. | ||
186 | This works because the scheduler usually keeps the process | ||
187 | on the same CPU and this syscall doesn't guarantee its | ||
188 | results anyways. | ||
189 | We do this here because otherwise user space would do it on | ||
190 | its own in a likely inferior way (no access to jiffies). | ||
191 | If you don't like it pass NULL. */ | ||
192 | if (tcache && tcache->blob[0] == (j = VVAR(jiffies))) { | ||
193 | p = tcache->blob[1]; | ||
194 | } else if (VVAR(vgetcpu_mode) == VGETCPU_RDTSCP) { | ||
195 | /* Load per CPU data from RDTSCP */ | ||
196 | native_read_tscp(&p); | ||
197 | } else { | ||
198 | /* Load per CPU data from GDT */ | ||
199 | asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG)); | ||
200 | } | ||
201 | if (tcache) { | ||
202 | tcache->blob[0] = j; | ||
203 | tcache->blob[1] = p; | ||
204 | } | ||
205 | if (cpu) | ||
206 | *cpu = p & 0xfff; | ||
207 | if (node) | ||
208 | *node = p >> 12; | ||
209 | return 0; | ||
210 | } | 191 | } |
211 | 192 | ||
212 | /* Assume __initcall executes before all user space. Hopefully kmod | 193 | /* |
213 | doesn't violate that. We'll find out if it does. */ | 194 | * Assume __initcall executes before all user space. Hopefully kmod |
195 | * doesn't violate that. We'll find out if it does. | ||
196 | */ | ||
214 | static void __cpuinit vsyscall_set_cpu(int cpu) | 197 | static void __cpuinit vsyscall_set_cpu(int cpu) |
215 | { | 198 | { |
216 | unsigned long d; | 199 | unsigned long d; |
@@ -221,13 +204,15 @@ static void __cpuinit vsyscall_set_cpu(int cpu) | |||
221 | if (cpu_has(&cpu_data(cpu), X86_FEATURE_RDTSCP)) | 204 | if (cpu_has(&cpu_data(cpu), X86_FEATURE_RDTSCP)) |
222 | write_rdtscp_aux((node << 12) | cpu); | 205 | write_rdtscp_aux((node << 12) | cpu); |
223 | 206 | ||
224 | /* Store cpu number in limit so that it can be loaded quickly | 207 | /* |
225 | in user space in vgetcpu. | 208 | * Store cpu number in limit so that it can be loaded quickly |
226 | 12 bits for the CPU and 8 bits for the node. */ | 209 | * in user space in vgetcpu. (12 bits for the CPU and 8 bits for the node) |
210 | */ | ||
227 | d = 0x0f40000000000ULL; | 211 | d = 0x0f40000000000ULL; |
228 | d |= cpu; | 212 | d |= cpu; |
229 | d |= (node & 0xf) << 12; | 213 | d |= (node & 0xf) << 12; |
230 | d |= (node >> 4) << 48; | 214 | d |= (node >> 4) << 48; |
215 | |||
231 | write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_PER_CPU, &d, DESCTYPE_S); | 216 | write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_PER_CPU, &d, DESCTYPE_S); |
232 | } | 217 | } |
233 | 218 | ||
@@ -241,8 +226,10 @@ static int __cpuinit | |||
241 | cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg) | 226 | cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg) |
242 | { | 227 | { |
243 | long cpu = (long)arg; | 228 | long cpu = (long)arg; |
229 | |||
244 | if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN) | 230 | if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN) |
245 | smp_call_function_single(cpu, cpu_vsyscall_init, NULL, 1); | 231 | smp_call_function_single(cpu, cpu_vsyscall_init, NULL, 1); |
232 | |||
246 | return NOTIFY_DONE; | 233 | return NOTIFY_DONE; |
247 | } | 234 | } |
248 | 235 | ||
@@ -256,21 +243,17 @@ void __init map_vsyscall(void) | |||
256 | /* Note that VSYSCALL_MAPPED_PAGES must agree with the code below. */ | 243 | /* Note that VSYSCALL_MAPPED_PAGES must agree with the code below. */ |
257 | __set_fixmap(VSYSCALL_FIRST_PAGE, physaddr_page0, PAGE_KERNEL_VSYSCALL); | 244 | __set_fixmap(VSYSCALL_FIRST_PAGE, physaddr_page0, PAGE_KERNEL_VSYSCALL); |
258 | __set_fixmap(VVAR_PAGE, physaddr_vvar_page, PAGE_KERNEL_VVAR); | 245 | __set_fixmap(VVAR_PAGE, physaddr_vvar_page, PAGE_KERNEL_VVAR); |
259 | BUILD_BUG_ON((unsigned long)__fix_to_virt(VVAR_PAGE) != | 246 | BUILD_BUG_ON((unsigned long)__fix_to_virt(VVAR_PAGE) != (unsigned long)VVAR_ADDRESS); |
260 | (unsigned long)VVAR_ADDRESS); | ||
261 | } | 247 | } |
262 | 248 | ||
263 | static int __init vsyscall_init(void) | 249 | static int __init vsyscall_init(void) |
264 | { | 250 | { |
265 | BUG_ON(((unsigned long) &vgettimeofday != | 251 | BUG_ON(VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE)); |
266 | VSYSCALL_ADDR(__NR_vgettimeofday))); | 252 | |
267 | BUG_ON((unsigned long) &vtime != VSYSCALL_ADDR(__NR_vtime)); | ||
268 | BUG_ON((VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE))); | ||
269 | BUG_ON((unsigned long) &vgetcpu != VSYSCALL_ADDR(__NR_vgetcpu)); | ||
270 | on_each_cpu(cpu_vsyscall_init, NULL, 1); | 253 | on_each_cpu(cpu_vsyscall_init, NULL, 1); |
271 | /* notifier priority > KVM */ | 254 | /* notifier priority > KVM */ |
272 | hotcpu_notifier(cpu_vsyscall_notifier, 30); | 255 | hotcpu_notifier(cpu_vsyscall_notifier, 30); |
256 | |||
273 | return 0; | 257 | return 0; |
274 | } | 258 | } |
275 | |||
276 | __initcall(vsyscall_init); | 259 | __initcall(vsyscall_init); |
diff --git a/arch/x86/kernel/vsyscall_emu_64.S b/arch/x86/kernel/vsyscall_emu_64.S new file mode 100644 index 000000000000..ffa845eae5ca --- /dev/null +++ b/arch/x86/kernel/vsyscall_emu_64.S | |||
@@ -0,0 +1,27 @@ | |||
1 | /* | ||
2 | * vsyscall_emu_64.S: Vsyscall emulation page | ||
3 | * | ||
4 | * Copyright (c) 2011 Andy Lutomirski | ||
5 | * | ||
6 | * Subject to the GNU General Public License, version 2 | ||
7 | */ | ||
8 | |||
9 | #include <linux/linkage.h> | ||
10 | #include <asm/irq_vectors.h> | ||
11 | |||
12 | /* The unused parts of the page are filled with 0xcc by the linker script. */ | ||
13 | |||
14 | .section .vsyscall_0, "a" | ||
15 | ENTRY(vsyscall_0) | ||
16 | int $VSYSCALL_EMU_VECTOR | ||
17 | END(vsyscall_0) | ||
18 | |||
19 | .section .vsyscall_1, "a" | ||
20 | ENTRY(vsyscall_1) | ||
21 | int $VSYSCALL_EMU_VECTOR | ||
22 | END(vsyscall_1) | ||
23 | |||
24 | .section .vsyscall_2, "a" | ||
25 | ENTRY(vsyscall_2) | ||
26 | int $VSYSCALL_EMU_VECTOR | ||
27 | END(vsyscall_2) | ||
diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h index 167c33361d9c..cc7a4e9cc7ad 100644 --- a/include/linux/seccomp.h +++ b/include/linux/seccomp.h | |||
@@ -19,6 +19,11 @@ static inline void secure_computing(int this_syscall) | |||
19 | extern long prctl_get_seccomp(void); | 19 | extern long prctl_get_seccomp(void); |
20 | extern long prctl_set_seccomp(unsigned long); | 20 | extern long prctl_set_seccomp(unsigned long); |
21 | 21 | ||
22 | static inline int seccomp_mode(seccomp_t *s) | ||
23 | { | ||
24 | return s->mode; | ||
25 | } | ||
26 | |||
22 | #else /* CONFIG_SECCOMP */ | 27 | #else /* CONFIG_SECCOMP */ |
23 | 28 | ||
24 | #include <linux/errno.h> | 29 | #include <linux/errno.h> |
@@ -37,6 +42,11 @@ static inline long prctl_set_seccomp(unsigned long arg2) | |||
37 | return -EINVAL; | 42 | return -EINVAL; |
38 | } | 43 | } |
39 | 44 | ||
45 | static inline int seccomp_mode(seccomp_t *s) | ||
46 | { | ||
47 | return 0; | ||
48 | } | ||
49 | |||
40 | #endif /* CONFIG_SECCOMP */ | 50 | #endif /* CONFIG_SECCOMP */ |
41 | 51 | ||
42 | #endif /* _LINUX_SECCOMP_H */ | 52 | #endif /* _LINUX_SECCOMP_H */ |