diff options
| -rw-r--r-- | arch/x86/include/asm/page_32_types.h | 1 | ||||
| -rw-r--r-- | arch/x86/include/asm/page_64_types.h | 11 | ||||
| -rw-r--r-- | arch/x86/include/asm/traps.h | 1 | ||||
| -rw-r--r-- | arch/x86/kernel/dumpstack_64.c | 1 | ||||
| -rw-r--r-- | arch/x86/kernel/entry_64.S | 81 | ||||
| -rw-r--r-- | arch/x86/kernel/traps.c | 71 |
6 files changed, 82 insertions, 84 deletions
diff --git a/arch/x86/include/asm/page_32_types.h b/arch/x86/include/asm/page_32_types.h index f48b17df4224..3a52ee0e726d 100644 --- a/arch/x86/include/asm/page_32_types.h +++ b/arch/x86/include/asm/page_32_types.h | |||
| @@ -20,7 +20,6 @@ | |||
| 20 | #define THREAD_SIZE_ORDER 1 | 20 | #define THREAD_SIZE_ORDER 1 |
| 21 | #define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) | 21 | #define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) |
| 22 | 22 | ||
| 23 | #define STACKFAULT_STACK 0 | ||
| 24 | #define DOUBLEFAULT_STACK 1 | 23 | #define DOUBLEFAULT_STACK 1 |
| 25 | #define NMI_STACK 0 | 24 | #define NMI_STACK 0 |
| 26 | #define DEBUG_STACK 0 | 25 | #define DEBUG_STACK 0 |
diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h index 678205195ae1..75450b2c7be4 100644 --- a/arch/x86/include/asm/page_64_types.h +++ b/arch/x86/include/asm/page_64_types.h | |||
| @@ -14,12 +14,11 @@ | |||
| 14 | #define IRQ_STACK_ORDER 2 | 14 | #define IRQ_STACK_ORDER 2 |
| 15 | #define IRQ_STACK_SIZE (PAGE_SIZE << IRQ_STACK_ORDER) | 15 | #define IRQ_STACK_SIZE (PAGE_SIZE << IRQ_STACK_ORDER) |
| 16 | 16 | ||
| 17 | #define STACKFAULT_STACK 1 | 17 | #define DOUBLEFAULT_STACK 1 |
| 18 | #define DOUBLEFAULT_STACK 2 | 18 | #define NMI_STACK 2 |
| 19 | #define NMI_STACK 3 | 19 | #define DEBUG_STACK 3 |
| 20 | #define DEBUG_STACK 4 | 20 | #define MCE_STACK 4 |
| 21 | #define MCE_STACK 5 | 21 | #define N_EXCEPTION_STACKS 4 /* hw limit: 7 */ |
| 22 | #define N_EXCEPTION_STACKS 5 /* hw limit: 7 */ | ||
| 23 | 22 | ||
| 24 | #define PUD_PAGE_SIZE (_AC(1, UL) << PUD_SHIFT) | 23 | #define PUD_PAGE_SIZE (_AC(1, UL) << PUD_SHIFT) |
| 25 | #define PUD_PAGE_MASK (~(PUD_PAGE_SIZE-1)) | 24 | #define PUD_PAGE_MASK (~(PUD_PAGE_SIZE-1)) |
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index bc8352e7010a..707adc6549d8 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h | |||
| @@ -39,6 +39,7 @@ asmlinkage void simd_coprocessor_error(void); | |||
| 39 | 39 | ||
| 40 | #ifdef CONFIG_TRACING | 40 | #ifdef CONFIG_TRACING |
| 41 | asmlinkage void trace_page_fault(void); | 41 | asmlinkage void trace_page_fault(void); |
| 42 | #define trace_stack_segment stack_segment | ||
| 42 | #define trace_divide_error divide_error | 43 | #define trace_divide_error divide_error |
| 43 | #define trace_bounds bounds | 44 | #define trace_bounds bounds |
| 44 | #define trace_invalid_op invalid_op | 45 | #define trace_invalid_op invalid_op |
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index 1abcb50b48ae..ff86f19b5758 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c | |||
| @@ -24,7 +24,6 @@ static char x86_stack_ids[][8] = { | |||
| 24 | [ DEBUG_STACK-1 ] = "#DB", | 24 | [ DEBUG_STACK-1 ] = "#DB", |
| 25 | [ NMI_STACK-1 ] = "NMI", | 25 | [ NMI_STACK-1 ] = "NMI", |
| 26 | [ DOUBLEFAULT_STACK-1 ] = "#DF", | 26 | [ DOUBLEFAULT_STACK-1 ] = "#DF", |
| 27 | [ STACKFAULT_STACK-1 ] = "#SS", | ||
| 28 | [ MCE_STACK-1 ] = "#MC", | 27 | [ MCE_STACK-1 ] = "#MC", |
| 29 | #if DEBUG_STKSZ > EXCEPTION_STKSZ | 28 | #if DEBUG_STKSZ > EXCEPTION_STKSZ |
| 30 | [ N_EXCEPTION_STACKS ... | 29 | [ N_EXCEPTION_STACKS ... |
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index df088bb03fb3..c0226ab54106 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
| @@ -828,9 +828,15 @@ ENTRY(native_iret) | |||
| 828 | jnz native_irq_return_ldt | 828 | jnz native_irq_return_ldt |
| 829 | #endif | 829 | #endif |
| 830 | 830 | ||
| 831 | .global native_irq_return_iret | ||
| 831 | native_irq_return_iret: | 832 | native_irq_return_iret: |
| 833 | /* | ||
| 834 | * This may fault. Non-paranoid faults on return to userspace are | ||
| 835 | * handled by fixup_bad_iret. These include #SS, #GP, and #NP. | ||
| 836 | * Double-faults due to espfix64 are handled in do_double_fault. | ||
| 837 | * Other faults here are fatal. | ||
| 838 | */ | ||
| 832 | iretq | 839 | iretq |
| 833 | _ASM_EXTABLE(native_irq_return_iret, bad_iret) | ||
| 834 | 840 | ||
| 835 | #ifdef CONFIG_X86_ESPFIX64 | 841 | #ifdef CONFIG_X86_ESPFIX64 |
| 836 | native_irq_return_ldt: | 842 | native_irq_return_ldt: |
| @@ -858,25 +864,6 @@ native_irq_return_ldt: | |||
| 858 | jmp native_irq_return_iret | 864 | jmp native_irq_return_iret |
| 859 | #endif | 865 | #endif |
| 860 | 866 | ||
| 861 | .section .fixup,"ax" | ||
| 862 | bad_iret: | ||
| 863 | /* | ||
| 864 | * The iret traps when the %cs or %ss being restored is bogus. | ||
| 865 | * We've lost the original trap vector and error code. | ||
| 866 | * #GPF is the most likely one to get for an invalid selector. | ||
| 867 | * So pretend we completed the iret and took the #GPF in user mode. | ||
| 868 | * | ||
| 869 | * We are now running with the kernel GS after exception recovery. | ||
| 870 | * But error_entry expects us to have user GS to match the user %cs, | ||
| 871 | * so swap back. | ||
| 872 | */ | ||
| 873 | pushq $0 | ||
| 874 | |||
| 875 | SWAPGS | ||
| 876 | jmp general_protection | ||
| 877 | |||
| 878 | .previous | ||
| 879 | |||
| 880 | /* edi: workmask, edx: work */ | 867 | /* edi: workmask, edx: work */ |
| 881 | retint_careful: | 868 | retint_careful: |
| 882 | CFI_RESTORE_STATE | 869 | CFI_RESTORE_STATE |
| @@ -922,37 +909,6 @@ ENTRY(retint_kernel) | |||
| 922 | CFI_ENDPROC | 909 | CFI_ENDPROC |
| 923 | END(common_interrupt) | 910 | END(common_interrupt) |
| 924 | 911 | ||
| 925 | /* | ||
| 926 | * If IRET takes a fault on the espfix stack, then we | ||
| 927 | * end up promoting it to a doublefault. In that case, | ||
| 928 | * modify the stack to make it look like we just entered | ||
| 929 | * the #GP handler from user space, similar to bad_iret. | ||
| 930 | */ | ||
| 931 | #ifdef CONFIG_X86_ESPFIX64 | ||
| 932 | ALIGN | ||
| 933 | __do_double_fault: | ||
| 934 | XCPT_FRAME 1 RDI+8 | ||
| 935 | movq RSP(%rdi),%rax /* Trap on the espfix stack? */ | ||
| 936 | sarq $PGDIR_SHIFT,%rax | ||
| 937 | cmpl $ESPFIX_PGD_ENTRY,%eax | ||
| 938 | jne do_double_fault /* No, just deliver the fault */ | ||
| 939 | cmpl $__KERNEL_CS,CS(%rdi) | ||
| 940 | jne do_double_fault | ||
| 941 | movq RIP(%rdi),%rax | ||
| 942 | cmpq $native_irq_return_iret,%rax | ||
| 943 | jne do_double_fault /* This shouldn't happen... */ | ||
| 944 | movq PER_CPU_VAR(kernel_stack),%rax | ||
| 945 | subq $(6*8-KERNEL_STACK_OFFSET),%rax /* Reset to original stack */ | ||
| 946 | movq %rax,RSP(%rdi) | ||
| 947 | movq $0,(%rax) /* Missing (lost) #GP error code */ | ||
| 948 | movq $general_protection,RIP(%rdi) | ||
| 949 | retq | ||
| 950 | CFI_ENDPROC | ||
| 951 | END(__do_double_fault) | ||
| 952 | #else | ||
| 953 | # define __do_double_fault do_double_fault | ||
| 954 | #endif | ||
| 955 | |||
| 956 | /* | 912 | /* |
| 957 | * APIC interrupts. | 913 | * APIC interrupts. |
| 958 | */ | 914 | */ |
| @@ -1124,7 +1080,7 @@ idtentry overflow do_overflow has_error_code=0 | |||
| 1124 | idtentry bounds do_bounds has_error_code=0 | 1080 | idtentry bounds do_bounds has_error_code=0 |
| 1125 | idtentry invalid_op do_invalid_op has_error_code=0 | 1081 | idtentry invalid_op do_invalid_op has_error_code=0 |
| 1126 | idtentry device_not_available do_device_not_available has_error_code=0 | 1082 | idtentry device_not_available do_device_not_available has_error_code=0 |
| 1127 | idtentry double_fault __do_double_fault has_error_code=1 paranoid=1 | 1083 | idtentry double_fault do_double_fault has_error_code=1 paranoid=1 |
| 1128 | idtentry coprocessor_segment_overrun do_coprocessor_segment_overrun has_error_code=0 | 1084 | idtentry coprocessor_segment_overrun do_coprocessor_segment_overrun has_error_code=0 |
| 1129 | idtentry invalid_TSS do_invalid_TSS has_error_code=1 | 1085 | idtentry invalid_TSS do_invalid_TSS has_error_code=1 |
| 1130 | idtentry segment_not_present do_segment_not_present has_error_code=1 | 1086 | idtentry segment_not_present do_segment_not_present has_error_code=1 |
| @@ -1289,7 +1245,7 @@ apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \ | |||
| 1289 | 1245 | ||
| 1290 | idtentry debug do_debug has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK | 1246 | idtentry debug do_debug has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK |
| 1291 | idtentry int3 do_int3 has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK | 1247 | idtentry int3 do_int3 has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK |
| 1292 | idtentry stack_segment do_stack_segment has_error_code=1 paranoid=1 | 1248 | idtentry stack_segment do_stack_segment has_error_code=1 |
| 1293 | #ifdef CONFIG_XEN | 1249 | #ifdef CONFIG_XEN |
| 1294 | idtentry xen_debug do_debug has_error_code=0 | 1250 | idtentry xen_debug do_debug has_error_code=0 |
| 1295 | idtentry xen_int3 do_int3 has_error_code=0 | 1251 | idtentry xen_int3 do_int3 has_error_code=0 |
| @@ -1399,17 +1355,16 @@ error_sti: | |||
| 1399 | 1355 | ||
| 1400 | /* | 1356 | /* |
| 1401 | * There are two places in the kernel that can potentially fault with | 1357 | * There are two places in the kernel that can potentially fault with |
| 1402 | * usergs. Handle them here. The exception handlers after iret run with | 1358 | * usergs. Handle them here. B stepping K8s sometimes report a |
| 1403 | * kernel gs again, so don't set the user space flag. B stepping K8s | 1359 | * truncated RIP for IRET exceptions returning to compat mode. Check |
| 1404 | * sometimes report an truncated RIP for IRET exceptions returning to | 1360 | * for these here too. |
| 1405 | * compat mode. Check for these here too. | ||
| 1406 | */ | 1361 | */ |
| 1407 | error_kernelspace: | 1362 | error_kernelspace: |
| 1408 | CFI_REL_OFFSET rcx, RCX+8 | 1363 | CFI_REL_OFFSET rcx, RCX+8 |
| 1409 | incl %ebx | 1364 | incl %ebx |
| 1410 | leaq native_irq_return_iret(%rip),%rcx | 1365 | leaq native_irq_return_iret(%rip),%rcx |
| 1411 | cmpq %rcx,RIP+8(%rsp) | 1366 | cmpq %rcx,RIP+8(%rsp) |
| 1412 | je error_swapgs | 1367 | je error_bad_iret |
| 1413 | movl %ecx,%eax /* zero extend */ | 1368 | movl %ecx,%eax /* zero extend */ |
| 1414 | cmpq %rax,RIP+8(%rsp) | 1369 | cmpq %rax,RIP+8(%rsp) |
| 1415 | je bstep_iret | 1370 | je bstep_iret |
| @@ -1420,7 +1375,15 @@ error_kernelspace: | |||
| 1420 | bstep_iret: | 1375 | bstep_iret: |
| 1421 | /* Fix truncated RIP */ | 1376 | /* Fix truncated RIP */ |
| 1422 | movq %rcx,RIP+8(%rsp) | 1377 | movq %rcx,RIP+8(%rsp) |
| 1423 | jmp error_swapgs | 1378 | /* fall through */ |
| 1379 | |||
| 1380 | error_bad_iret: | ||
| 1381 | SWAPGS | ||
| 1382 | mov %rsp,%rdi | ||
| 1383 | call fixup_bad_iret | ||
| 1384 | mov %rax,%rsp | ||
| 1385 | decl %ebx /* Return to usergs */ | ||
| 1386 | jmp error_sti | ||
| 1424 | CFI_ENDPROC | 1387 | CFI_ENDPROC |
| 1425 | END(error_entry) | 1388 | END(error_entry) |
| 1426 | 1389 | ||
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 0d0e922fafc1..de801f22128a 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c | |||
| @@ -233,32 +233,40 @@ DO_ERROR(X86_TRAP_UD, SIGILL, "invalid opcode", invalid_op) | |||
| 233 | DO_ERROR(X86_TRAP_OLD_MF, SIGFPE, "coprocessor segment overrun",coprocessor_segment_overrun) | 233 | DO_ERROR(X86_TRAP_OLD_MF, SIGFPE, "coprocessor segment overrun",coprocessor_segment_overrun) |
| 234 | DO_ERROR(X86_TRAP_TS, SIGSEGV, "invalid TSS", invalid_TSS) | 234 | DO_ERROR(X86_TRAP_TS, SIGSEGV, "invalid TSS", invalid_TSS) |
| 235 | DO_ERROR(X86_TRAP_NP, SIGBUS, "segment not present", segment_not_present) | 235 | DO_ERROR(X86_TRAP_NP, SIGBUS, "segment not present", segment_not_present) |
| 236 | #ifdef CONFIG_X86_32 | ||
| 237 | DO_ERROR(X86_TRAP_SS, SIGBUS, "stack segment", stack_segment) | 236 | DO_ERROR(X86_TRAP_SS, SIGBUS, "stack segment", stack_segment) |
| 238 | #endif | ||
| 239 | DO_ERROR(X86_TRAP_AC, SIGBUS, "alignment check", alignment_check) | 237 | DO_ERROR(X86_TRAP_AC, SIGBUS, "alignment check", alignment_check) |
| 240 | 238 | ||
| 241 | #ifdef CONFIG_X86_64 | 239 | #ifdef CONFIG_X86_64 |
| 242 | /* Runs on IST stack */ | 240 | /* Runs on IST stack */ |
| 243 | dotraplinkage void do_stack_segment(struct pt_regs *regs, long error_code) | ||
| 244 | { | ||
| 245 | enum ctx_state prev_state; | ||
| 246 | |||
| 247 | prev_state = exception_enter(); | ||
| 248 | if (notify_die(DIE_TRAP, "stack segment", regs, error_code, | ||
| 249 | X86_TRAP_SS, SIGBUS) != NOTIFY_STOP) { | ||
| 250 | preempt_conditional_sti(regs); | ||
| 251 | do_trap(X86_TRAP_SS, SIGBUS, "stack segment", regs, error_code, NULL); | ||
| 252 | preempt_conditional_cli(regs); | ||
| 253 | } | ||
| 254 | exception_exit(prev_state); | ||
| 255 | } | ||
| 256 | |||
| 257 | dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) | 241 | dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) |
| 258 | { | 242 | { |
| 259 | static const char str[] = "double fault"; | 243 | static const char str[] = "double fault"; |
| 260 | struct task_struct *tsk = current; | 244 | struct task_struct *tsk = current; |
| 261 | 245 | ||
| 246 | #ifdef CONFIG_X86_ESPFIX64 | ||
| 247 | extern unsigned char native_irq_return_iret[]; | ||
| 248 | |||
| 249 | /* | ||
| 250 | * If IRET takes a non-IST fault on the espfix64 stack, then we | ||
| 251 | * end up promoting it to a doublefault. In that case, modify | ||
| 252 | * the stack to make it look like we just entered the #GP | ||
| 253 | * handler from user space, similar to bad_iret. | ||
| 254 | */ | ||
| 255 | if (((long)regs->sp >> PGDIR_SHIFT) == ESPFIX_PGD_ENTRY && | ||
| 256 | regs->cs == __KERNEL_CS && | ||
| 257 | regs->ip == (unsigned long)native_irq_return_iret) | ||
| 258 | { | ||
| 259 | struct pt_regs *normal_regs = task_pt_regs(current); | ||
| 260 | |||
| 261 | /* Fake a #GP(0) from userspace. */ | ||
| 262 | memmove(&normal_regs->ip, (void *)regs->sp, 5*8); | ||
| 263 | normal_regs->orig_ax = 0; /* Missing (lost) #GP error code */ | ||
| 264 | regs->ip = (unsigned long)general_protection; | ||
| 265 | regs->sp = (unsigned long)&normal_regs->orig_ax; | ||
| 266 | return; | ||
| 267 | } | ||
| 268 | #endif | ||
| 269 | |||
| 262 | exception_enter(); | 270 | exception_enter(); |
| 263 | /* Return not checked because double check cannot be ignored */ | 271 | /* Return not checked because double check cannot be ignored */ |
| 264 | notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV); | 272 | notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV); |
| @@ -399,6 +407,35 @@ asmlinkage __visible struct pt_regs *sync_regs(struct pt_regs *eregs) | |||
| 399 | return regs; | 407 | return regs; |
| 400 | } | 408 | } |
| 401 | NOKPROBE_SYMBOL(sync_regs); | 409 | NOKPROBE_SYMBOL(sync_regs); |
| 410 | |||
| 411 | struct bad_iret_stack { | ||
| 412 | void *error_entry_ret; | ||
| 413 | struct pt_regs regs; | ||
| 414 | }; | ||
| 415 | |||
| 416 | asmlinkage __visible | ||
| 417 | struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s) | ||
| 418 | { | ||
| 419 | /* | ||
| 420 | * This is called from entry_64.S early in handling a fault | ||
| 421 | * caused by a bad iret to user mode. To handle the fault | ||
| 422 | * correctly, we want move our stack frame to task_pt_regs | ||
| 423 | * and we want to pretend that the exception came from the | ||
| 424 | * iret target. | ||
| 425 | */ | ||
| 426 | struct bad_iret_stack *new_stack = | ||
| 427 | container_of(task_pt_regs(current), | ||
| 428 | struct bad_iret_stack, regs); | ||
| 429 | |||
| 430 | /* Copy the IRET target to the new stack. */ | ||
| 431 | memmove(&new_stack->regs.ip, (void *)s->regs.sp, 5*8); | ||
| 432 | |||
| 433 | /* Copy the remainder of the stack from the current stack. */ | ||
| 434 | memmove(new_stack, s, offsetof(struct bad_iret_stack, regs.ip)); | ||
| 435 | |||
| 436 | BUG_ON(!user_mode_vm(&new_stack->regs)); | ||
| 437 | return new_stack; | ||
| 438 | } | ||
| 402 | #endif | 439 | #endif |
| 403 | 440 | ||
| 404 | /* | 441 | /* |
| @@ -778,7 +815,7 @@ void __init trap_init(void) | |||
| 778 | set_intr_gate(X86_TRAP_OLD_MF, coprocessor_segment_overrun); | 815 | set_intr_gate(X86_TRAP_OLD_MF, coprocessor_segment_overrun); |
| 779 | set_intr_gate(X86_TRAP_TS, invalid_TSS); | 816 | set_intr_gate(X86_TRAP_TS, invalid_TSS); |
| 780 | set_intr_gate(X86_TRAP_NP, segment_not_present); | 817 | set_intr_gate(X86_TRAP_NP, segment_not_present); |
| 781 | set_intr_gate_ist(X86_TRAP_SS, &stack_segment, STACKFAULT_STACK); | 818 | set_intr_gate(X86_TRAP_SS, stack_segment); |
| 782 | set_intr_gate(X86_TRAP_GP, general_protection); | 819 | set_intr_gate(X86_TRAP_GP, general_protection); |
| 783 | set_intr_gate(X86_TRAP_SPURIOUS, spurious_interrupt_bug); | 820 | set_intr_gate(X86_TRAP_SPURIOUS, spurious_interrupt_bug); |
| 784 | set_intr_gate(X86_TRAP_MF, coprocessor_error); | 821 | set_intr_gate(X86_TRAP_MF, coprocessor_error); |
