aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlexander van Heukelum <heukelum@fastmail.fm>2009-06-17 18:35:57 -0400
committerH. Peter Anvin <hpa@zytor.com>2009-06-18 00:35:09 -0400
commit2e04bc76560decd9270be2a805927316f617ef56 (patch)
tree8cda321314bf75690921f66394230a85e6ec4b65
parent3f4c3955ea320bde870ac2ce587466295aba5710 (diff)
i386: fix return to 16-bit stack from NMI handler
Returning to a task with a 16-bit stack requires special care: the iret instruction does not restore the high word of esp in that case. The espfix code fixes this, but currently is not invoked on NMIs. This means that a running task gets the upper word of esp clobbered due intervening NMIs. To reproduce, compile and run the following program with the nmi watchdog enabled (nmi_watchdog=2 on the command line). Using gdb you can see that the high bits of esp contain garbage, while the low bits are still correct. This patch puts the espfix code back into the NMI code path. The patch is slightly complicated due to the irqtrace infrastructure not being NMI-safe. The NMI return path cannot call TRACE_IRQS_IRET. Otherwise, the tail of the normal iret-code is correct for the nmi code path too. To be able to share this code-path, the TRACE_IRQS_IRET was move up a bit. The espfix code exists after the TRACE_IRQS_IRET, but this code explicitly disables interrupts. This short interrupts-off section is now not traced anymore. The return-to-kernel path now always includes the preliminary test to decide if the espfix code should be called. This is never the case, but doing it this way keeps the patch as simple as possible and the few extra instructions should not affect timing in any significant way. #define _GNU_SOURCE #include <stdio.h> #include <sys/types.h> #include <sys/mman.h> #include <unistd.h> #include <sys/syscall.h> #include <asm/ldt.h> int modify_ldt(int func, void *ptr, unsigned long bytecount) { return syscall(SYS_modify_ldt, func, ptr, bytecount); } /* this is assumed to be usable */ #define SEGBASEADDR 0x10000 #define SEGLIMIT 0x20000 /* 16-bit segment */ struct user_desc desc = { .entry_number = 0, .base_addr = SEGBASEADDR, .limit = SEGLIMIT, .seg_32bit = 0, .contents = 0, /* ??? */ .read_exec_only = 0, .limit_in_pages = 0, .seg_not_present = 0, .useable = 1 }; int main(void) { setvbuf(stdout, NULL, _IONBF, 0); /* map a 64 kb segment */ char *pointer = mmap((void *)SEGBASEADDR, SEGLIMIT+1, PROT_EXEC|PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANONYMOUS, -1, 0); if (pointer == NULL) { printf("could not map space\n"); return 0; } /* write ldt, new mode */ int err = modify_ldt(0x11, &desc, sizeof(desc)); if (err) { printf("error modifying ldt: %i\n", err); return 0; } for (int i=0; i<1000; i++) { asm volatile ( "pusha\n\t" "mov %ss, %eax\n\t" /* preserve ss:esp */ "mov %esp, %ebp\n\t" "push $7\n\t" /* index 0, ldt, user mode */ "push $65536-4096\n\t" /* esp */ "lss (%esp), %esp\n\t" /* switch to new stack */ "push %eax\n\t" /* save old ss:esp on new stack */ "push %ebp\n\t" "add $17*65536, %esp\n\t" /* set high bits */ "mov %esp, %edx\n\t" "mov $10000000, %ecx\n\t" /* wait... */ "1: loop 1b\n\t" /* ... a bit */ "cmp %esp, %edx\n\t" "je 1f\n\t" "ud2\n\t" /* esp changed inexplicably! */ "1:\n\t" "sub $17*65536, %esp\n\t" /* restore high bits */ "lss (%esp), %esp\n\t" /* restore old ss:esp */ "popa\n\t"); printf("\rx%ix", i); } return 0; } Signed-off-by: Alexander van Heukelum <heukelum@fastmail.fm> Acked-by: Stas Sergeev <stsp@aknet.ru> Signed-off-by: H. Peter Anvin <hpa@zytor.com>
-rw-r--r--arch/x86/kernel/entry_32.S14
1 files changed, 8 insertions, 6 deletions
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index c929add475c9..d7d1c7d20e4e 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -84,7 +84,7 @@
84#define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF 84#define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF
85#else 85#else
86#define preempt_stop(clobbers) 86#define preempt_stop(clobbers)
87#define resume_kernel restore_nocheck 87#define resume_kernel restore_all
88#endif 88#endif
89 89
90.macro TRACE_IRQS_IRET 90.macro TRACE_IRQS_IRET
@@ -372,7 +372,7 @@ END(ret_from_exception)
372ENTRY(resume_kernel) 372ENTRY(resume_kernel)
373 DISABLE_INTERRUPTS(CLBR_ANY) 373 DISABLE_INTERRUPTS(CLBR_ANY)
374 cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ? 374 cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ?
375 jnz restore_nocheck 375 jnz restore_all
376need_resched: 376need_resched:
377 movl TI_flags(%ebp), %ecx # need_resched set ? 377 movl TI_flags(%ebp), %ecx # need_resched set ?
378 testb $_TIF_NEED_RESCHED, %cl 378 testb $_TIF_NEED_RESCHED, %cl
@@ -540,6 +540,8 @@ syscall_exit:
540 jne syscall_exit_work 540 jne syscall_exit_work
541 541
542restore_all: 542restore_all:
543 TRACE_IRQS_IRET
544restore_all_notrace:
543 movl PT_EFLAGS(%esp), %eax # mix EFLAGS, SS and CS 545 movl PT_EFLAGS(%esp), %eax # mix EFLAGS, SS and CS
544 # Warning: PT_OLDSS(%esp) contains the wrong/random values if we 546 # Warning: PT_OLDSS(%esp) contains the wrong/random values if we
545 # are returning to the kernel. 547 # are returning to the kernel.
@@ -551,8 +553,6 @@ restore_all:
551 CFI_REMEMBER_STATE 553 CFI_REMEMBER_STATE
552 je ldt_ss # returning to user-space with LDT SS 554 je ldt_ss # returning to user-space with LDT SS
553restore_nocheck: 555restore_nocheck:
554 TRACE_IRQS_IRET
555restore_nocheck_notrace:
556 RESTORE_REGS 4 # skip orig_eax/error_code 556 RESTORE_REGS 4 # skip orig_eax/error_code
557 CFI_ADJUST_CFA_OFFSET -4 557 CFI_ADJUST_CFA_OFFSET -4
558irq_return: 558irq_return:
@@ -601,8 +601,10 @@ ldt_ss:
601 CFI_ADJUST_CFA_OFFSET 4 601 CFI_ADJUST_CFA_OFFSET 4
602 pushl %eax 602 pushl %eax
603 CFI_ADJUST_CFA_OFFSET 4 603 CFI_ADJUST_CFA_OFFSET 4
604 /* Disable interrupts, but do not irqtrace this section: we
605 * will soon execute iret and the tracer was already set to
606 * the irqstate after the iret */
604 DISABLE_INTERRUPTS(CLBR_EAX) 607 DISABLE_INTERRUPTS(CLBR_EAX)
605 TRACE_IRQS_OFF
606 lss (%esp), %esp 608 lss (%esp), %esp
607 CFI_ADJUST_CFA_OFFSET -8 609 CFI_ADJUST_CFA_OFFSET -8
608 jmp restore_nocheck 610 jmp restore_nocheck
@@ -1329,7 +1331,7 @@ nmi_stack_correct:
1329 xorl %edx,%edx # zero error code 1331 xorl %edx,%edx # zero error code
1330 movl %esp,%eax # pt_regs pointer 1332 movl %esp,%eax # pt_regs pointer
1331 call do_nmi 1333 call do_nmi
1332 jmp restore_nocheck_notrace 1334 jmp restore_all_notrace
1333 CFI_ENDPROC 1335 CFI_ENDPROC
1334 1336
1335nmi_stack_fixup: 1337nmi_stack_fixup: