aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2014-11-29 23:47:48 -0500
committerDavid S. Miller <davem@davemloft.net>2014-11-29 23:47:48 -0500
commit60b7379dc5b1743427b031cca53e30860a38ada6 (patch)
treec0462b8dd188861bd04f36dd31672b7446e35dd8 /arch/x86
parenta523a5ecc8c6ddceb8f783f600605553e5ad8963 (diff)
parent7a5a4f978750756755dc839014e13d1b088ccc8e (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/include/asm/page_32_types.h1
-rw-r--r--arch/x86/include/asm/page_64_types.h11
-rw-r--r--arch/x86/include/asm/thread_info.h2
-rw-r--r--arch/x86/include/asm/traps.h1
-rw-r--r--arch/x86/kernel/dumpstack_64.c1
-rw-r--r--arch/x86/kernel/entry_64.S81
-rw-r--r--arch/x86/kernel/traps.c71
-rw-r--r--arch/x86/kvm/mmu.c6
8 files changed, 86 insertions, 88 deletions
diff --git a/arch/x86/include/asm/page_32_types.h b/arch/x86/include/asm/page_32_types.h
index f48b17df4224..3a52ee0e726d 100644
--- a/arch/x86/include/asm/page_32_types.h
+++ b/arch/x86/include/asm/page_32_types.h
@@ -20,7 +20,6 @@
20#define THREAD_SIZE_ORDER 1 20#define THREAD_SIZE_ORDER 1
21#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) 21#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER)
22 22
23#define STACKFAULT_STACK 0
24#define DOUBLEFAULT_STACK 1 23#define DOUBLEFAULT_STACK 1
25#define NMI_STACK 0 24#define NMI_STACK 0
26#define DEBUG_STACK 0 25#define DEBUG_STACK 0
diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h
index 678205195ae1..75450b2c7be4 100644
--- a/arch/x86/include/asm/page_64_types.h
+++ b/arch/x86/include/asm/page_64_types.h
@@ -14,12 +14,11 @@
14#define IRQ_STACK_ORDER 2 14#define IRQ_STACK_ORDER 2
15#define IRQ_STACK_SIZE (PAGE_SIZE << IRQ_STACK_ORDER) 15#define IRQ_STACK_SIZE (PAGE_SIZE << IRQ_STACK_ORDER)
16 16
17#define STACKFAULT_STACK 1 17#define DOUBLEFAULT_STACK 1
18#define DOUBLEFAULT_STACK 2 18#define NMI_STACK 2
19#define NMI_STACK 3 19#define DEBUG_STACK 3
20#define DEBUG_STACK 4 20#define MCE_STACK 4
21#define MCE_STACK 5 21#define N_EXCEPTION_STACKS 4 /* hw limit: 7 */
22#define N_EXCEPTION_STACKS 5 /* hw limit: 7 */
23 22
24#define PUD_PAGE_SIZE (_AC(1, UL) << PUD_SHIFT) 23#define PUD_PAGE_SIZE (_AC(1, UL) << PUD_SHIFT)
25#define PUD_PAGE_MASK (~(PUD_PAGE_SIZE-1)) 24#define PUD_PAGE_MASK (~(PUD_PAGE_SIZE-1))
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 854053889d4d..547e344a6dc6 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -141,7 +141,7 @@ struct thread_info {
141/* Only used for 64 bit */ 141/* Only used for 64 bit */
142#define _TIF_DO_NOTIFY_MASK \ 142#define _TIF_DO_NOTIFY_MASK \
143 (_TIF_SIGPENDING | _TIF_MCE_NOTIFY | _TIF_NOTIFY_RESUME | \ 143 (_TIF_SIGPENDING | _TIF_MCE_NOTIFY | _TIF_NOTIFY_RESUME | \
144 _TIF_USER_RETURN_NOTIFY) 144 _TIF_USER_RETURN_NOTIFY | _TIF_UPROBE)
145 145
146/* flags to check in __switch_to() */ 146/* flags to check in __switch_to() */
147#define _TIF_WORK_CTXSW \ 147#define _TIF_WORK_CTXSW \
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index bc8352e7010a..707adc6549d8 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -39,6 +39,7 @@ asmlinkage void simd_coprocessor_error(void);
39 39
40#ifdef CONFIG_TRACING 40#ifdef CONFIG_TRACING
41asmlinkage void trace_page_fault(void); 41asmlinkage void trace_page_fault(void);
42#define trace_stack_segment stack_segment
42#define trace_divide_error divide_error 43#define trace_divide_error divide_error
43#define trace_bounds bounds 44#define trace_bounds bounds
44#define trace_invalid_op invalid_op 45#define trace_invalid_op invalid_op
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 1abcb50b48ae..ff86f19b5758 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -24,7 +24,6 @@ static char x86_stack_ids[][8] = {
24 [ DEBUG_STACK-1 ] = "#DB", 24 [ DEBUG_STACK-1 ] = "#DB",
25 [ NMI_STACK-1 ] = "NMI", 25 [ NMI_STACK-1 ] = "NMI",
26 [ DOUBLEFAULT_STACK-1 ] = "#DF", 26 [ DOUBLEFAULT_STACK-1 ] = "#DF",
27 [ STACKFAULT_STACK-1 ] = "#SS",
28 [ MCE_STACK-1 ] = "#MC", 27 [ MCE_STACK-1 ] = "#MC",
29#if DEBUG_STKSZ > EXCEPTION_STKSZ 28#if DEBUG_STKSZ > EXCEPTION_STKSZ
30 [ N_EXCEPTION_STACKS ... 29 [ N_EXCEPTION_STACKS ...
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index df088bb03fb3..c0226ab54106 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -828,9 +828,15 @@ ENTRY(native_iret)
828 jnz native_irq_return_ldt 828 jnz native_irq_return_ldt
829#endif 829#endif
830 830
831.global native_irq_return_iret
831native_irq_return_iret: 832native_irq_return_iret:
833 /*
834 * This may fault. Non-paranoid faults on return to userspace are
835 * handled by fixup_bad_iret. These include #SS, #GP, and #NP.
836 * Double-faults due to espfix64 are handled in do_double_fault.
837 * Other faults here are fatal.
838 */
832 iretq 839 iretq
833 _ASM_EXTABLE(native_irq_return_iret, bad_iret)
834 840
835#ifdef CONFIG_X86_ESPFIX64 841#ifdef CONFIG_X86_ESPFIX64
836native_irq_return_ldt: 842native_irq_return_ldt:
@@ -858,25 +864,6 @@ native_irq_return_ldt:
858 jmp native_irq_return_iret 864 jmp native_irq_return_iret
859#endif 865#endif
860 866
861 .section .fixup,"ax"
862bad_iret:
863 /*
864 * The iret traps when the %cs or %ss being restored is bogus.
865 * We've lost the original trap vector and error code.
866 * #GPF is the most likely one to get for an invalid selector.
867 * So pretend we completed the iret and took the #GPF in user mode.
868 *
869 * We are now running with the kernel GS after exception recovery.
870 * But error_entry expects us to have user GS to match the user %cs,
871 * so swap back.
872 */
873 pushq $0
874
875 SWAPGS
876 jmp general_protection
877
878 .previous
879
880 /* edi: workmask, edx: work */ 867 /* edi: workmask, edx: work */
881retint_careful: 868retint_careful:
882 CFI_RESTORE_STATE 869 CFI_RESTORE_STATE
@@ -922,37 +909,6 @@ ENTRY(retint_kernel)
922 CFI_ENDPROC 909 CFI_ENDPROC
923END(common_interrupt) 910END(common_interrupt)
924 911
925 /*
926 * If IRET takes a fault on the espfix stack, then we
927 * end up promoting it to a doublefault. In that case,
928 * modify the stack to make it look like we just entered
929 * the #GP handler from user space, similar to bad_iret.
930 */
931#ifdef CONFIG_X86_ESPFIX64
932 ALIGN
933__do_double_fault:
934 XCPT_FRAME 1 RDI+8
935 movq RSP(%rdi),%rax /* Trap on the espfix stack? */
936 sarq $PGDIR_SHIFT,%rax
937 cmpl $ESPFIX_PGD_ENTRY,%eax
938 jne do_double_fault /* No, just deliver the fault */
939 cmpl $__KERNEL_CS,CS(%rdi)
940 jne do_double_fault
941 movq RIP(%rdi),%rax
942 cmpq $native_irq_return_iret,%rax
943 jne do_double_fault /* This shouldn't happen... */
944 movq PER_CPU_VAR(kernel_stack),%rax
945 subq $(6*8-KERNEL_STACK_OFFSET),%rax /* Reset to original stack */
946 movq %rax,RSP(%rdi)
947 movq $0,(%rax) /* Missing (lost) #GP error code */
948 movq $general_protection,RIP(%rdi)
949 retq
950 CFI_ENDPROC
951END(__do_double_fault)
952#else
953# define __do_double_fault do_double_fault
954#endif
955
956/* 912/*
957 * APIC interrupts. 913 * APIC interrupts.
958 */ 914 */
@@ -1124,7 +1080,7 @@ idtentry overflow do_overflow has_error_code=0
1124idtentry bounds do_bounds has_error_code=0 1080idtentry bounds do_bounds has_error_code=0
1125idtentry invalid_op do_invalid_op has_error_code=0 1081idtentry invalid_op do_invalid_op has_error_code=0
1126idtentry device_not_available do_device_not_available has_error_code=0 1082idtentry device_not_available do_device_not_available has_error_code=0
1127idtentry double_fault __do_double_fault has_error_code=1 paranoid=1 1083idtentry double_fault do_double_fault has_error_code=1 paranoid=1
1128idtentry coprocessor_segment_overrun do_coprocessor_segment_overrun has_error_code=0 1084idtentry coprocessor_segment_overrun do_coprocessor_segment_overrun has_error_code=0
1129idtentry invalid_TSS do_invalid_TSS has_error_code=1 1085idtentry invalid_TSS do_invalid_TSS has_error_code=1
1130idtentry segment_not_present do_segment_not_present has_error_code=1 1086idtentry segment_not_present do_segment_not_present has_error_code=1
@@ -1289,7 +1245,7 @@ apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \
1289 1245
1290idtentry debug do_debug has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK 1246idtentry debug do_debug has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK
1291idtentry int3 do_int3 has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK 1247idtentry int3 do_int3 has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK
1292idtentry stack_segment do_stack_segment has_error_code=1 paranoid=1 1248idtentry stack_segment do_stack_segment has_error_code=1
1293#ifdef CONFIG_XEN 1249#ifdef CONFIG_XEN
1294idtentry xen_debug do_debug has_error_code=0 1250idtentry xen_debug do_debug has_error_code=0
1295idtentry xen_int3 do_int3 has_error_code=0 1251idtentry xen_int3 do_int3 has_error_code=0
@@ -1399,17 +1355,16 @@ error_sti:
1399 1355
1400/* 1356/*
1401 * There are two places in the kernel that can potentially fault with 1357 * There are two places in the kernel that can potentially fault with
1402 * usergs. Handle them here. The exception handlers after iret run with 1358 * usergs. Handle them here. B stepping K8s sometimes report a
1403 * kernel gs again, so don't set the user space flag. B stepping K8s 1359 * truncated RIP for IRET exceptions returning to compat mode. Check
1404 * sometimes report an truncated RIP for IRET exceptions returning to 1360 * for these here too.
1405 * compat mode. Check for these here too.
1406 */ 1361 */
1407error_kernelspace: 1362error_kernelspace:
1408 CFI_REL_OFFSET rcx, RCX+8 1363 CFI_REL_OFFSET rcx, RCX+8
1409 incl %ebx 1364 incl %ebx
1410 leaq native_irq_return_iret(%rip),%rcx 1365 leaq native_irq_return_iret(%rip),%rcx
1411 cmpq %rcx,RIP+8(%rsp) 1366 cmpq %rcx,RIP+8(%rsp)
1412 je error_swapgs 1367 je error_bad_iret
1413 movl %ecx,%eax /* zero extend */ 1368 movl %ecx,%eax /* zero extend */
1414 cmpq %rax,RIP+8(%rsp) 1369 cmpq %rax,RIP+8(%rsp)
1415 je bstep_iret 1370 je bstep_iret
@@ -1420,7 +1375,15 @@ error_kernelspace:
1420bstep_iret: 1375bstep_iret:
1421 /* Fix truncated RIP */ 1376 /* Fix truncated RIP */
1422 movq %rcx,RIP+8(%rsp) 1377 movq %rcx,RIP+8(%rsp)
1423 jmp error_swapgs 1378 /* fall through */
1379
1380error_bad_iret:
1381 SWAPGS
1382 mov %rsp,%rdi
1383 call fixup_bad_iret
1384 mov %rax,%rsp
1385 decl %ebx /* Return to usergs */
1386 jmp error_sti
1424 CFI_ENDPROC 1387 CFI_ENDPROC
1425END(error_entry) 1388END(error_entry)
1426 1389
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 0d0e922fafc1..de801f22128a 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -233,32 +233,40 @@ DO_ERROR(X86_TRAP_UD, SIGILL, "invalid opcode", invalid_op)
233DO_ERROR(X86_TRAP_OLD_MF, SIGFPE, "coprocessor segment overrun",coprocessor_segment_overrun) 233DO_ERROR(X86_TRAP_OLD_MF, SIGFPE, "coprocessor segment overrun",coprocessor_segment_overrun)
234DO_ERROR(X86_TRAP_TS, SIGSEGV, "invalid TSS", invalid_TSS) 234DO_ERROR(X86_TRAP_TS, SIGSEGV, "invalid TSS", invalid_TSS)
235DO_ERROR(X86_TRAP_NP, SIGBUS, "segment not present", segment_not_present) 235DO_ERROR(X86_TRAP_NP, SIGBUS, "segment not present", segment_not_present)
236#ifdef CONFIG_X86_32
237DO_ERROR(X86_TRAP_SS, SIGBUS, "stack segment", stack_segment) 236DO_ERROR(X86_TRAP_SS, SIGBUS, "stack segment", stack_segment)
238#endif
239DO_ERROR(X86_TRAP_AC, SIGBUS, "alignment check", alignment_check) 237DO_ERROR(X86_TRAP_AC, SIGBUS, "alignment check", alignment_check)
240 238
241#ifdef CONFIG_X86_64 239#ifdef CONFIG_X86_64
242/* Runs on IST stack */ 240/* Runs on IST stack */
243dotraplinkage void do_stack_segment(struct pt_regs *regs, long error_code)
244{
245 enum ctx_state prev_state;
246
247 prev_state = exception_enter();
248 if (notify_die(DIE_TRAP, "stack segment", regs, error_code,
249 X86_TRAP_SS, SIGBUS) != NOTIFY_STOP) {
250 preempt_conditional_sti(regs);
251 do_trap(X86_TRAP_SS, SIGBUS, "stack segment", regs, error_code, NULL);
252 preempt_conditional_cli(regs);
253 }
254 exception_exit(prev_state);
255}
256
257dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) 241dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
258{ 242{
259 static const char str[] = "double fault"; 243 static const char str[] = "double fault";
260 struct task_struct *tsk = current; 244 struct task_struct *tsk = current;
261 245
246#ifdef CONFIG_X86_ESPFIX64
247 extern unsigned char native_irq_return_iret[];
248
249 /*
250 * If IRET takes a non-IST fault on the espfix64 stack, then we
251 * end up promoting it to a doublefault. In that case, modify
252 * the stack to make it look like we just entered the #GP
253 * handler from user space, similar to bad_iret.
254 */
255 if (((long)regs->sp >> PGDIR_SHIFT) == ESPFIX_PGD_ENTRY &&
256 regs->cs == __KERNEL_CS &&
257 regs->ip == (unsigned long)native_irq_return_iret)
258 {
259 struct pt_regs *normal_regs = task_pt_regs(current);
260
261 /* Fake a #GP(0) from userspace. */
262 memmove(&normal_regs->ip, (void *)regs->sp, 5*8);
263 normal_regs->orig_ax = 0; /* Missing (lost) #GP error code */
264 regs->ip = (unsigned long)general_protection;
265 regs->sp = (unsigned long)&normal_regs->orig_ax;
266 return;
267 }
268#endif
269
262 exception_enter(); 270 exception_enter();
263 /* Return not checked because double check cannot be ignored */ 271 /* Return not checked because double check cannot be ignored */
264 notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV); 272 notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV);
@@ -399,6 +407,35 @@ asmlinkage __visible struct pt_regs *sync_regs(struct pt_regs *eregs)
399 return regs; 407 return regs;
400} 408}
401NOKPROBE_SYMBOL(sync_regs); 409NOKPROBE_SYMBOL(sync_regs);
410
411struct bad_iret_stack {
412 void *error_entry_ret;
413 struct pt_regs regs;
414};
415
416asmlinkage __visible
417struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s)
418{
419 /*
420 * This is called from entry_64.S early in handling a fault
421 * caused by a bad iret to user mode. To handle the fault
422 * correctly, we want move our stack frame to task_pt_regs
423 * and we want to pretend that the exception came from the
424 * iret target.
425 */
426 struct bad_iret_stack *new_stack =
427 container_of(task_pt_regs(current),
428 struct bad_iret_stack, regs);
429
430 /* Copy the IRET target to the new stack. */
431 memmove(&new_stack->regs.ip, (void *)s->regs.sp, 5*8);
432
433 /* Copy the remainder of the stack from the current stack. */
434 memmove(new_stack, s, offsetof(struct bad_iret_stack, regs.ip));
435
436 BUG_ON(!user_mode_vm(&new_stack->regs));
437 return new_stack;
438}
402#endif 439#endif
403 440
404/* 441/*
@@ -778,7 +815,7 @@ void __init trap_init(void)
778 set_intr_gate(X86_TRAP_OLD_MF, coprocessor_segment_overrun); 815 set_intr_gate(X86_TRAP_OLD_MF, coprocessor_segment_overrun);
779 set_intr_gate(X86_TRAP_TS, invalid_TSS); 816 set_intr_gate(X86_TRAP_TS, invalid_TSS);
780 set_intr_gate(X86_TRAP_NP, segment_not_present); 817 set_intr_gate(X86_TRAP_NP, segment_not_present);
781 set_intr_gate_ist(X86_TRAP_SS, &stack_segment, STACKFAULT_STACK); 818 set_intr_gate(X86_TRAP_SS, stack_segment);
782 set_intr_gate(X86_TRAP_GP, general_protection); 819 set_intr_gate(X86_TRAP_GP, general_protection);
783 set_intr_gate(X86_TRAP_SPURIOUS, spurious_interrupt_bug); 820 set_intr_gate(X86_TRAP_SPURIOUS, spurious_interrupt_bug);
784 set_intr_gate(X86_TRAP_MF, coprocessor_error); 821 set_intr_gate(X86_TRAP_MF, coprocessor_error);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index ac1c4de3a484..978f402006ee 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -630,7 +630,7 @@ static int mmu_spte_clear_track_bits(u64 *sptep)
630 * kvm mmu, before reclaiming the page, we should 630 * kvm mmu, before reclaiming the page, we should
631 * unmap it from mmu first. 631 * unmap it from mmu first.
632 */ 632 */
633 WARN_ON(!kvm_is_mmio_pfn(pfn) && !page_count(pfn_to_page(pfn))); 633 WARN_ON(!kvm_is_reserved_pfn(pfn) && !page_count(pfn_to_page(pfn)));
634 634
635 if (!shadow_accessed_mask || old_spte & shadow_accessed_mask) 635 if (!shadow_accessed_mask || old_spte & shadow_accessed_mask)
636 kvm_set_pfn_accessed(pfn); 636 kvm_set_pfn_accessed(pfn);
@@ -2461,7 +2461,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
2461 spte |= PT_PAGE_SIZE_MASK; 2461 spte |= PT_PAGE_SIZE_MASK;
2462 if (tdp_enabled) 2462 if (tdp_enabled)
2463 spte |= kvm_x86_ops->get_mt_mask(vcpu, gfn, 2463 spte |= kvm_x86_ops->get_mt_mask(vcpu, gfn,
2464 kvm_is_mmio_pfn(pfn)); 2464 kvm_is_reserved_pfn(pfn));
2465 2465
2466 if (host_writable) 2466 if (host_writable)
2467 spte |= SPTE_HOST_WRITEABLE; 2467 spte |= SPTE_HOST_WRITEABLE;
@@ -2737,7 +2737,7 @@ static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu,
2737 * PT_PAGE_TABLE_LEVEL and there would be no adjustment done 2737 * PT_PAGE_TABLE_LEVEL and there would be no adjustment done
2738 * here. 2738 * here.
2739 */ 2739 */
2740 if (!is_error_noslot_pfn(pfn) && !kvm_is_mmio_pfn(pfn) && 2740 if (!is_error_noslot_pfn(pfn) && !kvm_is_reserved_pfn(pfn) &&
2741 level == PT_PAGE_TABLE_LEVEL && 2741 level == PT_PAGE_TABLE_LEVEL &&
2742 PageTransCompound(pfn_to_page(pfn)) && 2742 PageTransCompound(pfn_to_page(pfn)) &&
2743 !has_wrprotected_page(vcpu->kvm, gfn, PT_DIRECTORY_LEVEL)) { 2743 !has_wrprotected_page(vcpu->kvm, gfn, PT_DIRECTORY_LEVEL)) {