diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-01-06 16:59:14 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-01-06 16:59:14 -0500 |
commit | 69734b644bf19f20d2989e1a8e5bf59c837ee5c1 (patch) | |
tree | b1afd22d6e84db04516e466c223d67c1c340e6d9 /arch/x86/kernel | |
parent | 67b0243131150391125d8d0beb5359d7aec78b55 (diff) | |
parent | ceb7b40b65539a771d1bfaf47660ac0ee57e0c4f (diff) |
Merge branch 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
* 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (21 commits)
x86: Fix atomic64_xxx_cx8() functions
x86: Fix and improve cmpxchg_double{,_local}()
x86_64, asm: Optimise fls(), ffs() and fls64()
x86, bitops: Move fls64.h inside __KERNEL__
x86: Fix and improve percpu_cmpxchg{8,16}b_double()
x86: Report cpb and eff_freq_ro flags correctly
x86/i386: Use less assembly in strlen(), speed things up a bit
x86: Use the same node_distance for 32 and 64-bit
x86: Fix rflags in FAKE_STACK_FRAME
x86: Clean up and extend do_int3()
x86: Call do_notify_resume() with interrupts enabled
x86/div64: Add a micro-optimization shortcut if base is power of two
x86-64: Cleanup some assembly entry points
x86-64: Slightly shorten line system call entry and exit paths
x86-64: Reduce amount of redundant code generated for invalidate_interruptNN
x86-64: Slightly shorten int_ret_from_sys_call
x86, efi: Convert efi_phys_get_time() args to physical addresses
x86: Default to vsyscall=emulate
x86-64: Set siginfo and context on vsyscall emulation faults
x86: consolidate xchg and xadd macros
...
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r-- | arch/x86/kernel/cpu/powerflags.c | 3 | ||||
-rw-r--r-- | arch/x86/kernel/entry_32.S | 4 | ||||
-rw-r--r-- | arch/x86/kernel/entry_64.S | 31 | ||||
-rw-r--r-- | arch/x86/kernel/process.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/traps.c | 7 | ||||
-rw-r--r-- | arch/x86/kernel/vsyscall_64.c | 77 |
6 files changed, 94 insertions, 30 deletions
diff --git a/arch/x86/kernel/cpu/powerflags.c b/arch/x86/kernel/cpu/powerflags.c index 5abbea297e0c..7b3fe56b1c21 100644 --- a/arch/x86/kernel/cpu/powerflags.c +++ b/arch/x86/kernel/cpu/powerflags.c | |||
@@ -16,5 +16,6 @@ const char *const x86_power_flags[32] = { | |||
16 | "100mhzsteps", | 16 | "100mhzsteps", |
17 | "hwpstate", | 17 | "hwpstate", |
18 | "", /* tsc invariant mapped to constant_tsc */ | 18 | "", /* tsc invariant mapped to constant_tsc */ |
19 | /* nothing */ | 19 | "cpb", /* core performance boost */ |
20 | "eff_freq_ro", /* Readonly aperf/mperf */ | ||
20 | }; | 21 | }; |
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index f3f6f5344001..22d0e21b4dd7 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S | |||
@@ -625,6 +625,8 @@ work_notifysig: # deal with pending signals and | |||
625 | movl %esp, %eax | 625 | movl %esp, %eax |
626 | jne work_notifysig_v86 # returning to kernel-space or | 626 | jne work_notifysig_v86 # returning to kernel-space or |
627 | # vm86-space | 627 | # vm86-space |
628 | TRACE_IRQS_ON | ||
629 | ENABLE_INTERRUPTS(CLBR_NONE) | ||
628 | xorl %edx, %edx | 630 | xorl %edx, %edx |
629 | call do_notify_resume | 631 | call do_notify_resume |
630 | jmp resume_userspace_sig | 632 | jmp resume_userspace_sig |
@@ -638,6 +640,8 @@ work_notifysig_v86: | |||
638 | #else | 640 | #else |
639 | movl %esp, %eax | 641 | movl %esp, %eax |
640 | #endif | 642 | #endif |
643 | TRACE_IRQS_ON | ||
644 | ENABLE_INTERRUPTS(CLBR_NONE) | ||
641 | xorl %edx, %edx | 645 | xorl %edx, %edx |
642 | call do_notify_resume | 646 | call do_notify_resume |
643 | jmp resume_userspace_sig | 647 | jmp resume_userspace_sig |
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index faf8d5e74b0b..a20e1cb9dc87 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
@@ -221,7 +221,7 @@ ENDPROC(native_usergs_sysret64) | |||
221 | /*CFI_REL_OFFSET ss,0*/ | 221 | /*CFI_REL_OFFSET ss,0*/ |
222 | pushq_cfi %rax /* rsp */ | 222 | pushq_cfi %rax /* rsp */ |
223 | CFI_REL_OFFSET rsp,0 | 223 | CFI_REL_OFFSET rsp,0 |
224 | pushq_cfi $X86_EFLAGS_IF /* eflags - interrupts on */ | 224 | pushq_cfi $(X86_EFLAGS_IF|X86_EFLAGS_BIT1) /* eflags - interrupts on */ |
225 | /*CFI_REL_OFFSET rflags,0*/ | 225 | /*CFI_REL_OFFSET rflags,0*/ |
226 | pushq_cfi $__KERNEL_CS /* cs */ | 226 | pushq_cfi $__KERNEL_CS /* cs */ |
227 | /*CFI_REL_OFFSET cs,0*/ | 227 | /*CFI_REL_OFFSET cs,0*/ |
@@ -411,7 +411,7 @@ ENTRY(ret_from_fork) | |||
411 | RESTORE_REST | 411 | RESTORE_REST |
412 | 412 | ||
413 | testl $3, CS-ARGOFFSET(%rsp) # from kernel_thread? | 413 | testl $3, CS-ARGOFFSET(%rsp) # from kernel_thread? |
414 | je int_ret_from_sys_call | 414 | jz retint_restore_args |
415 | 415 | ||
416 | testl $_TIF_IA32, TI_flags(%rcx) # 32-bit compat task needs IRET | 416 | testl $_TIF_IA32, TI_flags(%rcx) # 32-bit compat task needs IRET |
417 | jnz int_ret_from_sys_call | 417 | jnz int_ret_from_sys_call |
@@ -465,7 +465,7 @@ ENTRY(system_call) | |||
465 | * after the swapgs, so that it can do the swapgs | 465 | * after the swapgs, so that it can do the swapgs |
466 | * for the guest and jump here on syscall. | 466 | * for the guest and jump here on syscall. |
467 | */ | 467 | */ |
468 | ENTRY(system_call_after_swapgs) | 468 | GLOBAL(system_call_after_swapgs) |
469 | 469 | ||
470 | movq %rsp,PER_CPU_VAR(old_rsp) | 470 | movq %rsp,PER_CPU_VAR(old_rsp) |
471 | movq PER_CPU_VAR(kernel_stack),%rsp | 471 | movq PER_CPU_VAR(kernel_stack),%rsp |
@@ -478,8 +478,7 @@ ENTRY(system_call_after_swapgs) | |||
478 | movq %rax,ORIG_RAX-ARGOFFSET(%rsp) | 478 | movq %rax,ORIG_RAX-ARGOFFSET(%rsp) |
479 | movq %rcx,RIP-ARGOFFSET(%rsp) | 479 | movq %rcx,RIP-ARGOFFSET(%rsp) |
480 | CFI_REL_OFFSET rip,RIP-ARGOFFSET | 480 | CFI_REL_OFFSET rip,RIP-ARGOFFSET |
481 | GET_THREAD_INFO(%rcx) | 481 | testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) |
482 | testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%rcx) | ||
483 | jnz tracesys | 482 | jnz tracesys |
484 | system_call_fastpath: | 483 | system_call_fastpath: |
485 | cmpq $__NR_syscall_max,%rax | 484 | cmpq $__NR_syscall_max,%rax |
@@ -496,10 +495,9 @@ ret_from_sys_call: | |||
496 | /* edi: flagmask */ | 495 | /* edi: flagmask */ |
497 | sysret_check: | 496 | sysret_check: |
498 | LOCKDEP_SYS_EXIT | 497 | LOCKDEP_SYS_EXIT |
499 | GET_THREAD_INFO(%rcx) | ||
500 | DISABLE_INTERRUPTS(CLBR_NONE) | 498 | DISABLE_INTERRUPTS(CLBR_NONE) |
501 | TRACE_IRQS_OFF | 499 | TRACE_IRQS_OFF |
502 | movl TI_flags(%rcx),%edx | 500 | movl TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET),%edx |
503 | andl %edi,%edx | 501 | andl %edi,%edx |
504 | jnz sysret_careful | 502 | jnz sysret_careful |
505 | CFI_REMEMBER_STATE | 503 | CFI_REMEMBER_STATE |
@@ -583,7 +581,7 @@ sysret_audit: | |||
583 | /* Do syscall tracing */ | 581 | /* Do syscall tracing */ |
584 | tracesys: | 582 | tracesys: |
585 | #ifdef CONFIG_AUDITSYSCALL | 583 | #ifdef CONFIG_AUDITSYSCALL |
586 | testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%rcx) | 584 | testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) |
587 | jz auditsys | 585 | jz auditsys |
588 | #endif | 586 | #endif |
589 | SAVE_REST | 587 | SAVE_REST |
@@ -612,8 +610,6 @@ tracesys: | |||
612 | GLOBAL(int_ret_from_sys_call) | 610 | GLOBAL(int_ret_from_sys_call) |
613 | DISABLE_INTERRUPTS(CLBR_NONE) | 611 | DISABLE_INTERRUPTS(CLBR_NONE) |
614 | TRACE_IRQS_OFF | 612 | TRACE_IRQS_OFF |
615 | testl $3,CS-ARGOFFSET(%rsp) | ||
616 | je retint_restore_args | ||
617 | movl $_TIF_ALLWORK_MASK,%edi | 613 | movl $_TIF_ALLWORK_MASK,%edi |
618 | /* edi: mask to check */ | 614 | /* edi: mask to check */ |
619 | GLOBAL(int_with_check) | 615 | GLOBAL(int_with_check) |
@@ -953,6 +949,7 @@ END(common_interrupt) | |||
953 | ENTRY(\sym) | 949 | ENTRY(\sym) |
954 | INTR_FRAME | 950 | INTR_FRAME |
955 | pushq_cfi $~(\num) | 951 | pushq_cfi $~(\num) |
952 | .Lcommon_\sym: | ||
956 | interrupt \do_sym | 953 | interrupt \do_sym |
957 | jmp ret_from_intr | 954 | jmp ret_from_intr |
958 | CFI_ENDPROC | 955 | CFI_ENDPROC |
@@ -976,13 +973,21 @@ apicinterrupt X86_PLATFORM_IPI_VECTOR \ | |||
976 | x86_platform_ipi smp_x86_platform_ipi | 973 | x86_platform_ipi smp_x86_platform_ipi |
977 | 974 | ||
978 | #ifdef CONFIG_SMP | 975 | #ifdef CONFIG_SMP |
979 | .irp idx,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, \ | 976 | ALIGN |
977 | INTR_FRAME | ||
978 | .irp idx,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, \ | ||
980 | 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 | 979 | 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 |
981 | .if NUM_INVALIDATE_TLB_VECTORS > \idx | 980 | .if NUM_INVALIDATE_TLB_VECTORS > \idx |
982 | apicinterrupt (INVALIDATE_TLB_VECTOR_START)+\idx \ | 981 | ENTRY(invalidate_interrupt\idx) |
983 | invalidate_interrupt\idx smp_invalidate_interrupt | 982 | pushq_cfi $~(INVALIDATE_TLB_VECTOR_START+\idx) |
983 | jmp .Lcommon_invalidate_interrupt0 | ||
984 | CFI_ADJUST_CFA_OFFSET -8 | ||
985 | END(invalidate_interrupt\idx) | ||
984 | .endif | 986 | .endif |
985 | .endr | 987 | .endr |
988 | CFI_ENDPROC | ||
989 | apicinterrupt INVALIDATE_TLB_VECTOR_START, \ | ||
990 | invalidate_interrupt0, smp_invalidate_interrupt | ||
986 | #endif | 991 | #endif |
987 | 992 | ||
988 | apicinterrupt THRESHOLD_APIC_VECTOR \ | 993 | apicinterrupt THRESHOLD_APIC_VECTOR \ |
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index ee5d4fbd53b4..15763af7bfe3 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c | |||
@@ -293,7 +293,7 @@ int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags) | |||
293 | regs.orig_ax = -1; | 293 | regs.orig_ax = -1; |
294 | regs.ip = (unsigned long) kernel_thread_helper; | 294 | regs.ip = (unsigned long) kernel_thread_helper; |
295 | regs.cs = __KERNEL_CS | get_kernel_rpl(); | 295 | regs.cs = __KERNEL_CS | get_kernel_rpl(); |
296 | regs.flags = X86_EFLAGS_IF | 0x2; | 296 | regs.flags = X86_EFLAGS_IF | X86_EFLAGS_BIT1; |
297 | 297 | ||
298 | /* Ok, create the new process.. */ | 298 | /* Ok, create the new process.. */ |
299 | return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, ®s, 0, NULL, NULL); | 299 | return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, ®s, 0, NULL, NULL); |
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index a8e3eb83466c..fa1191fb679d 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c | |||
@@ -306,15 +306,10 @@ dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code) | |||
306 | == NOTIFY_STOP) | 306 | == NOTIFY_STOP) |
307 | return; | 307 | return; |
308 | #endif /* CONFIG_KGDB_LOW_LEVEL_TRAP */ | 308 | #endif /* CONFIG_KGDB_LOW_LEVEL_TRAP */ |
309 | #ifdef CONFIG_KPROBES | 309 | |
310 | if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) | 310 | if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) |
311 | == NOTIFY_STOP) | 311 | == NOTIFY_STOP) |
312 | return; | 312 | return; |
313 | #else | ||
314 | if (notify_die(DIE_TRAP, "int3", regs, error_code, 3, SIGTRAP) | ||
315 | == NOTIFY_STOP) | ||
316 | return; | ||
317 | #endif | ||
318 | 313 | ||
319 | preempt_conditional_sti(regs); | 314 | preempt_conditional_sti(regs); |
320 | do_trap(3, SIGTRAP, "int3", regs, error_code, NULL); | 315 | do_trap(3, SIGTRAP, "int3", regs, error_code, NULL); |
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index e4d4a22e8b94..b07ba9393564 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c | |||
@@ -57,7 +57,7 @@ DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data) = | |||
57 | .lock = __SEQLOCK_UNLOCKED(__vsyscall_gtod_data.lock), | 57 | .lock = __SEQLOCK_UNLOCKED(__vsyscall_gtod_data.lock), |
58 | }; | 58 | }; |
59 | 59 | ||
60 | static enum { EMULATE, NATIVE, NONE } vsyscall_mode = NATIVE; | 60 | static enum { EMULATE, NATIVE, NONE } vsyscall_mode = EMULATE; |
61 | 61 | ||
62 | static int __init vsyscall_setup(char *str) | 62 | static int __init vsyscall_setup(char *str) |
63 | { | 63 | { |
@@ -140,11 +140,40 @@ static int addr_to_vsyscall_nr(unsigned long addr) | |||
140 | return nr; | 140 | return nr; |
141 | } | 141 | } |
142 | 142 | ||
143 | static bool write_ok_or_segv(unsigned long ptr, size_t size) | ||
144 | { | ||
145 | /* | ||
146 | * XXX: if access_ok, get_user, and put_user handled | ||
147 | * sig_on_uaccess_error, this could go away. | ||
148 | */ | ||
149 | |||
150 | if (!access_ok(VERIFY_WRITE, (void __user *)ptr, size)) { | ||
151 | siginfo_t info; | ||
152 | struct thread_struct *thread = ¤t->thread; | ||
153 | |||
154 | thread->error_code = 6; /* user fault, no page, write */ | ||
155 | thread->cr2 = ptr; | ||
156 | thread->trap_no = 14; | ||
157 | |||
158 | memset(&info, 0, sizeof(info)); | ||
159 | info.si_signo = SIGSEGV; | ||
160 | info.si_errno = 0; | ||
161 | info.si_code = SEGV_MAPERR; | ||
162 | info.si_addr = (void __user *)ptr; | ||
163 | |||
164 | force_sig_info(SIGSEGV, &info, current); | ||
165 | return false; | ||
166 | } else { | ||
167 | return true; | ||
168 | } | ||
169 | } | ||
170 | |||
143 | bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) | 171 | bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) |
144 | { | 172 | { |
145 | struct task_struct *tsk; | 173 | struct task_struct *tsk; |
146 | unsigned long caller; | 174 | unsigned long caller; |
147 | int vsyscall_nr; | 175 | int vsyscall_nr; |
176 | int prev_sig_on_uaccess_error; | ||
148 | long ret; | 177 | long ret; |
149 | 178 | ||
150 | /* | 179 | /* |
@@ -180,35 +209,65 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) | |||
180 | if (seccomp_mode(&tsk->seccomp)) | 209 | if (seccomp_mode(&tsk->seccomp)) |
181 | do_exit(SIGKILL); | 210 | do_exit(SIGKILL); |
182 | 211 | ||
212 | /* | ||
213 | * With a real vsyscall, page faults cause SIGSEGV. We want to | ||
214 | * preserve that behavior to make writing exploits harder. | ||
215 | */ | ||
216 | prev_sig_on_uaccess_error = current_thread_info()->sig_on_uaccess_error; | ||
217 | current_thread_info()->sig_on_uaccess_error = 1; | ||
218 | |||
219 | /* | ||
220 | * 0 is a valid user pointer (in the access_ok sense) on 32-bit and | ||
221 | * 64-bit, so we don't need to special-case it here. For all the | ||
222 | * vsyscalls, 0 means "don't write anything" not "write it at | ||
223 | * address 0". | ||
224 | */ | ||
225 | ret = -EFAULT; | ||
183 | switch (vsyscall_nr) { | 226 | switch (vsyscall_nr) { |
184 | case 0: | 227 | case 0: |
228 | if (!write_ok_or_segv(regs->di, sizeof(struct timeval)) || | ||
229 | !write_ok_or_segv(regs->si, sizeof(struct timezone))) | ||
230 | break; | ||
231 | |||
185 | ret = sys_gettimeofday( | 232 | ret = sys_gettimeofday( |
186 | (struct timeval __user *)regs->di, | 233 | (struct timeval __user *)regs->di, |
187 | (struct timezone __user *)regs->si); | 234 | (struct timezone __user *)regs->si); |
188 | break; | 235 | break; |
189 | 236 | ||
190 | case 1: | 237 | case 1: |
238 | if (!write_ok_or_segv(regs->di, sizeof(time_t))) | ||
239 | break; | ||
240 | |||
191 | ret = sys_time((time_t __user *)regs->di); | 241 | ret = sys_time((time_t __user *)regs->di); |
192 | break; | 242 | break; |
193 | 243 | ||
194 | case 2: | 244 | case 2: |
245 | if (!write_ok_or_segv(regs->di, sizeof(unsigned)) || | ||
246 | !write_ok_or_segv(regs->si, sizeof(unsigned))) | ||
247 | break; | ||
248 | |||
195 | ret = sys_getcpu((unsigned __user *)regs->di, | 249 | ret = sys_getcpu((unsigned __user *)regs->di, |
196 | (unsigned __user *)regs->si, | 250 | (unsigned __user *)regs->si, |
197 | 0); | 251 | 0); |
198 | break; | 252 | break; |
199 | } | 253 | } |
200 | 254 | ||
255 | current_thread_info()->sig_on_uaccess_error = prev_sig_on_uaccess_error; | ||
256 | |||
201 | if (ret == -EFAULT) { | 257 | if (ret == -EFAULT) { |
202 | /* | 258 | /* Bad news -- userspace fed a bad pointer to a vsyscall. */ |
203 | * Bad news -- userspace fed a bad pointer to a vsyscall. | ||
204 | * | ||
205 | * With a real vsyscall, that would have caused SIGSEGV. | ||
206 | * To make writing reliable exploits using the emulated | ||
207 | * vsyscalls harder, generate SIGSEGV here as well. | ||
208 | */ | ||
209 | warn_bad_vsyscall(KERN_INFO, regs, | 259 | warn_bad_vsyscall(KERN_INFO, regs, |
210 | "vsyscall fault (exploit attempt?)"); | 260 | "vsyscall fault (exploit attempt?)"); |
211 | goto sigsegv; | 261 | |
262 | /* | ||
263 | * If we failed to generate a signal for any reason, | ||
264 | * generate one here. (This should be impossible.) | ||
265 | */ | ||
266 | if (WARN_ON_ONCE(!sigismember(&tsk->pending.signal, SIGBUS) && | ||
267 | !sigismember(&tsk->pending.signal, SIGSEGV))) | ||
268 | goto sigsegv; | ||
269 | |||
270 | return true; /* Don't emulate the ret. */ | ||
212 | } | 271 | } |
213 | 272 | ||
214 | regs->ax = ret; | 273 | regs->ax = ret; |