aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/entry_64.S
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2012-02-28 04:26:07 -0500
committerIngo Molnar <mingo@elte.hu>2012-02-28 04:27:36 -0500
commit458ce2910aa83d8a2cafb489d727f7da839e73c6 (patch)
tree28e088137eb068e5620f17eaf0ca19d1562006ed /arch/x86/kernel/entry_64.S
parent69466466ce889cd2cbc8cda9ff1c6083f48cc7f9 (diff)
parent586c6e7013c8cbb8c91aaa6568ec349b1dc2c691 (diff)
Merge branch 'linus' into x86/asm
Sync up the latest NMI fixes. Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86/kernel/entry_64.S')
-rw-r--r--arch/x86/kernel/entry_64.S239
1 files changed, 199 insertions, 40 deletions
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 211b2e1683f1..94c636aa7e1d 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -55,6 +55,7 @@
55#include <asm/paravirt.h> 55#include <asm/paravirt.h>
56#include <asm/ftrace.h> 56#include <asm/ftrace.h>
57#include <asm/percpu.h> 57#include <asm/percpu.h>
58#include <linux/err.h>
58 59
59/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ 60/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
60#include <linux/elf-em.h> 61#include <linux/elf-em.h>
@@ -547,7 +548,7 @@ badsys:
547#ifdef CONFIG_AUDITSYSCALL 548#ifdef CONFIG_AUDITSYSCALL
548 /* 549 /*
549 * Fast path for syscall audit without full syscall trace. 550 * Fast path for syscall audit without full syscall trace.
550 * We just call audit_syscall_entry() directly, and then 551 * We just call __audit_syscall_entry() directly, and then
551 * jump back to the normal fast path. 552 * jump back to the normal fast path.
552 */ 553 */
553auditsys: 554auditsys:
@@ -557,22 +558,21 @@ auditsys:
557 movq %rdi,%rdx /* 3rd arg: 1st syscall arg */ 558 movq %rdi,%rdx /* 3rd arg: 1st syscall arg */
558 movq %rax,%rsi /* 2nd arg: syscall number */ 559 movq %rax,%rsi /* 2nd arg: syscall number */
559 movl $AUDIT_ARCH_X86_64,%edi /* 1st arg: audit arch */ 560 movl $AUDIT_ARCH_X86_64,%edi /* 1st arg: audit arch */
560 call audit_syscall_entry 561 call __audit_syscall_entry
561 LOAD_ARGS 0 /* reload call-clobbered registers */ 562 LOAD_ARGS 0 /* reload call-clobbered registers */
562 jmp system_call_fastpath 563 jmp system_call_fastpath
563 564
564 /* 565 /*
565 * Return fast path for syscall audit. Call audit_syscall_exit() 566 * Return fast path for syscall audit. Call __audit_syscall_exit()
566 * directly and then jump back to the fast path with TIF_SYSCALL_AUDIT 567 * directly and then jump back to the fast path with TIF_SYSCALL_AUDIT
567 * masked off. 568 * masked off.
568 */ 569 */
569sysret_audit: 570sysret_audit:
570 movq RAX-ARGOFFSET(%rsp),%rsi /* second arg, syscall return value */ 571 movq RAX-ARGOFFSET(%rsp),%rsi /* second arg, syscall return value */
571 cmpq $0,%rsi /* is it < 0? */ 572 cmpq $-MAX_ERRNO,%rsi /* is it < -MAX_ERRNO? */
572 setl %al /* 1 if so, 0 if not */ 573 setbe %al /* 1 if so, 0 if not */
573 movzbl %al,%edi /* zero-extend that into %edi */ 574 movzbl %al,%edi /* zero-extend that into %edi */
574 inc %edi /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */ 575 call __audit_syscall_exit
575 call audit_syscall_exit
576 movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi 576 movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi
577 jmp sysret_check 577 jmp sysret_check
578#endif /* CONFIG_AUDITSYSCALL */ 578#endif /* CONFIG_AUDITSYSCALL */
@@ -1479,62 +1479,221 @@ ENTRY(error_exit)
1479 CFI_ENDPROC 1479 CFI_ENDPROC
1480END(error_exit) 1480END(error_exit)
1481 1481
1482/*
1483 * Test if a given stack is an NMI stack or not.
1484 */
1485 .macro test_in_nmi reg stack nmi_ret normal_ret
1486 cmpq %\reg, \stack
1487 ja \normal_ret
1488 subq $EXCEPTION_STKSZ, %\reg
1489 cmpq %\reg, \stack
1490 jb \normal_ret
1491 jmp \nmi_ret
1492 .endm
1482 1493
1483 /* runs on exception stack */ 1494 /* runs on exception stack */
1484ENTRY(nmi) 1495ENTRY(nmi)
1485 INTR_FRAME 1496 INTR_FRAME
1486 PARAVIRT_ADJUST_EXCEPTION_FRAME 1497 PARAVIRT_ADJUST_EXCEPTION_FRAME
1487 pushq_cfi $-1 1498 /*
1499 * We allow breakpoints in NMIs. If a breakpoint occurs, then
1500 * the iretq it performs will take us out of NMI context.
1501 * This means that we can have nested NMIs where the next
1502 * NMI is using the top of the stack of the previous NMI. We
1503 * can't let it execute because the nested NMI will corrupt the
1504 * stack of the previous NMI. NMI handlers are not re-entrant
1505 * anyway.
1506 *
1507 * To handle this case we do the following:
1508 * Check the a special location on the stack that contains
1509 * a variable that is set when NMIs are executing.
1510 * The interrupted task's stack is also checked to see if it
1511 * is an NMI stack.
1512 * If the variable is not set and the stack is not the NMI
1513 * stack then:
1514 * o Set the special variable on the stack
1515 * o Copy the interrupt frame into a "saved" location on the stack
1516 * o Copy the interrupt frame into a "copy" location on the stack
1517 * o Continue processing the NMI
1518 * If the variable is set or the previous stack is the NMI stack:
1519 * o Modify the "copy" location to jump to the repeate_nmi
1520 * o return back to the first NMI
1521 *
1522 * Now on exit of the first NMI, we first clear the stack variable
1523 * The NMI stack will tell any nested NMIs at that point that it is
1524 * nested. Then we pop the stack normally with iret, and if there was
1525 * a nested NMI that updated the copy interrupt stack frame, a
1526 * jump will be made to the repeat_nmi code that will handle the second
1527 * NMI.
1528 */
1529
1530 /* Use %rdx as out temp variable throughout */
1531 pushq_cfi %rdx
1532
1533 /*
1534 * If %cs was not the kernel segment, then the NMI triggered in user
1535 * space, which means it is definitely not nested.
1536 */
1537 cmpl $__KERNEL_CS, 16(%rsp)
1538 jne first_nmi
1539
1540 /*
1541 * Check the special variable on the stack to see if NMIs are
1542 * executing.
1543 */
1544 cmpl $1, -8(%rsp)
1545 je nested_nmi
1546
1547 /*
1548 * Now test if the previous stack was an NMI stack.
1549 * We need the double check. We check the NMI stack to satisfy the
1550 * race when the first NMI clears the variable before returning.
1551 * We check the variable because the first NMI could be in a
1552 * breakpoint routine using a breakpoint stack.
1553 */
1554 lea 6*8(%rsp), %rdx
1555 test_in_nmi rdx, 4*8(%rsp), nested_nmi, first_nmi
1556
1557nested_nmi:
1558 /*
1559 * Do nothing if we interrupted the fixup in repeat_nmi.
1560 * It's about to repeat the NMI handler, so we are fine
1561 * with ignoring this one.
1562 */
1563 movq $repeat_nmi, %rdx
1564 cmpq 8(%rsp), %rdx
1565 ja 1f
1566 movq $end_repeat_nmi, %rdx
1567 cmpq 8(%rsp), %rdx
1568 ja nested_nmi_out
1569
15701:
1571 /* Set up the interrupted NMIs stack to jump to repeat_nmi */
1572 leaq -6*8(%rsp), %rdx
1573 movq %rdx, %rsp
1574 CFI_ADJUST_CFA_OFFSET 6*8
1575 pushq_cfi $__KERNEL_DS
1576 pushq_cfi %rdx
1577 pushfq_cfi
1578 pushq_cfi $__KERNEL_CS
1579 pushq_cfi $repeat_nmi
1580
1581 /* Put stack back */
1582 addq $(11*8), %rsp
1583 CFI_ADJUST_CFA_OFFSET -11*8
1584
1585nested_nmi_out:
1586 popq_cfi %rdx
1587
1588 /* No need to check faults here */
1589 INTERRUPT_RETURN
1590
1591first_nmi:
1592 /*
1593 * Because nested NMIs will use the pushed location that we
1594 * stored in rdx, we must keep that space available.
1595 * Here's what our stack frame will look like:
1596 * +-------------------------+
1597 * | original SS |
1598 * | original Return RSP |
1599 * | original RFLAGS |
1600 * | original CS |
1601 * | original RIP |
1602 * +-------------------------+
1603 * | temp storage for rdx |
1604 * +-------------------------+
1605 * | NMI executing variable |
1606 * +-------------------------+
1607 * | Saved SS |
1608 * | Saved Return RSP |
1609 * | Saved RFLAGS |
1610 * | Saved CS |
1611 * | Saved RIP |
1612 * +-------------------------+
1613 * | copied SS |
1614 * | copied Return RSP |
1615 * | copied RFLAGS |
1616 * | copied CS |
1617 * | copied RIP |
1618 * +-------------------------+
1619 * | pt_regs |
1620 * +-------------------------+
1621 *
1622 * The saved RIP is used to fix up the copied RIP that a nested
1623 * NMI may zero out. The original stack frame and the temp storage
1624 * is also used by nested NMIs and can not be trusted on exit.
1625 */
1626 /* Set the NMI executing variable on the stack. */
1627 pushq_cfi $1
1628
1629 /* Copy the stack frame to the Saved frame */
1630 .rept 5
1631 pushq_cfi 6*8(%rsp)
1632 .endr
1633
1634 /* Make another copy, this one may be modified by nested NMIs */
1635 .rept 5
1636 pushq_cfi 4*8(%rsp)
1637 .endr
1638
1639 /* Do not pop rdx, nested NMIs will corrupt it */
1640 movq 11*8(%rsp), %rdx
1641
1642 /*
1643 * Everything below this point can be preempted by a nested
1644 * NMI if the first NMI took an exception. Repeated NMIs
1645 * caused by an exception and nested NMI will start here, and
1646 * can still be preempted by another NMI.
1647 */
1648restart_nmi:
1649 pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */
1488 subq $ORIG_RAX-R15, %rsp 1650 subq $ORIG_RAX-R15, %rsp
1489 CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 1651 CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
1652 /*
1653 * Use save_paranoid to handle SWAPGS, but no need to use paranoid_exit
1654 * as we should not be calling schedule in NMI context.
1655 * Even with normal interrupts enabled. An NMI should not be
1656 * setting NEED_RESCHED or anything that normal interrupts and
1657 * exceptions might do.
1658 */
1490 call save_paranoid 1659 call save_paranoid
1491 DEFAULT_FRAME 0 1660 DEFAULT_FRAME 0
1492 /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */ 1661 /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */
1493 movq %rsp,%rdi 1662 movq %rsp,%rdi
1494 movq $-1,%rsi 1663 movq $-1,%rsi
1495 call do_nmi 1664 call do_nmi
1496#ifdef CONFIG_TRACE_IRQFLAGS
1497 /* paranoidexit; without TRACE_IRQS_OFF */
1498 /* ebx: no swapgs flag */
1499 DISABLE_INTERRUPTS(CLBR_NONE)
1500 testl %ebx,%ebx /* swapgs needed? */ 1665 testl %ebx,%ebx /* swapgs needed? */
1501 jnz nmi_restore 1666 jnz nmi_restore
1502 testl $3,CS(%rsp)
1503 jnz nmi_userspace
1504nmi_swapgs: 1667nmi_swapgs:
1505 SWAPGS_UNSAFE_STACK 1668 SWAPGS_UNSAFE_STACK
1506nmi_restore: 1669nmi_restore:
1507 RESTORE_ALL 8 1670 RESTORE_ALL 8
1671 /* Clear the NMI executing stack variable */
1672 movq $0, 10*8(%rsp)
1508 jmp irq_return 1673 jmp irq_return
1509nmi_userspace:
1510 GET_THREAD_INFO(%rcx)
1511 movl TI_flags(%rcx),%ebx
1512 andl $_TIF_WORK_MASK,%ebx
1513 jz nmi_swapgs
1514 movq %rsp,%rdi /* &pt_regs */
1515 call sync_regs
1516 movq %rax,%rsp /* switch stack for scheduling */
1517 testl $_TIF_NEED_RESCHED,%ebx
1518 jnz nmi_schedule
1519 movl %ebx,%edx /* arg3: thread flags */
1520 ENABLE_INTERRUPTS(CLBR_NONE)
1521 xorl %esi,%esi /* arg2: oldset */
1522 movq %rsp,%rdi /* arg1: &pt_regs */
1523 call do_notify_resume
1524 DISABLE_INTERRUPTS(CLBR_NONE)
1525 jmp nmi_userspace
1526nmi_schedule:
1527 ENABLE_INTERRUPTS(CLBR_ANY)
1528 call schedule
1529 DISABLE_INTERRUPTS(CLBR_ANY)
1530 jmp nmi_userspace
1531 CFI_ENDPROC
1532#else
1533 jmp paranoid_exit
1534 CFI_ENDPROC 1674 CFI_ENDPROC
1535#endif
1536END(nmi) 1675END(nmi)
1537 1676
1677 /*
1678 * If an NMI hit an iret because of an exception or breakpoint,
1679 * it can lose its NMI context, and a nested NMI may come in.
1680 * In that case, the nested NMI will change the preempted NMI's
1681 * stack to jump to here when it does the final iret.
1682 */
1683repeat_nmi:
1684 INTR_FRAME
1685 /* Update the stack variable to say we are still in NMI */
1686 movq $1, 5*8(%rsp)
1687
1688 /* copy the saved stack back to copy stack */
1689 .rept 5
1690 pushq_cfi 4*8(%rsp)
1691 .endr
1692
1693 jmp restart_nmi
1694 CFI_ENDPROC
1695end_repeat_nmi:
1696
1538ENTRY(ignore_sysret) 1697ENTRY(ignore_sysret)
1539 CFI_STARTPROC 1698 CFI_STARTPROC
1540 mov $-ENOSYS,%eax 1699 mov $-ENOSYS,%eax