diff options
author | Ingo Molnar <mingo@elte.hu> | 2012-02-28 04:26:07 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2012-02-28 04:27:36 -0500 |
commit | 458ce2910aa83d8a2cafb489d727f7da839e73c6 (patch) | |
tree | 28e088137eb068e5620f17eaf0ca19d1562006ed /arch/x86/kernel/entry_64.S | |
parent | 69466466ce889cd2cbc8cda9ff1c6083f48cc7f9 (diff) | |
parent | 586c6e7013c8cbb8c91aaa6568ec349b1dc2c691 (diff) |
Merge branch 'linus' into x86/asm
Sync up the latest NMI fixes.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86/kernel/entry_64.S')
-rw-r--r-- | arch/x86/kernel/entry_64.S | 239 |
1 files changed, 199 insertions, 40 deletions
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 211b2e1683f1..94c636aa7e1d 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
@@ -55,6 +55,7 @@ | |||
55 | #include <asm/paravirt.h> | 55 | #include <asm/paravirt.h> |
56 | #include <asm/ftrace.h> | 56 | #include <asm/ftrace.h> |
57 | #include <asm/percpu.h> | 57 | #include <asm/percpu.h> |
58 | #include <linux/err.h> | ||
58 | 59 | ||
59 | /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ | 60 | /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ |
60 | #include <linux/elf-em.h> | 61 | #include <linux/elf-em.h> |
@@ -547,7 +548,7 @@ badsys: | |||
547 | #ifdef CONFIG_AUDITSYSCALL | 548 | #ifdef CONFIG_AUDITSYSCALL |
548 | /* | 549 | /* |
549 | * Fast path for syscall audit without full syscall trace. | 550 | * Fast path for syscall audit without full syscall trace. |
550 | * We just call audit_syscall_entry() directly, and then | 551 | * We just call __audit_syscall_entry() directly, and then |
551 | * jump back to the normal fast path. | 552 | * jump back to the normal fast path. |
552 | */ | 553 | */ |
553 | auditsys: | 554 | auditsys: |
@@ -557,22 +558,21 @@ auditsys: | |||
557 | movq %rdi,%rdx /* 3rd arg: 1st syscall arg */ | 558 | movq %rdi,%rdx /* 3rd arg: 1st syscall arg */ |
558 | movq %rax,%rsi /* 2nd arg: syscall number */ | 559 | movq %rax,%rsi /* 2nd arg: syscall number */ |
559 | movl $AUDIT_ARCH_X86_64,%edi /* 1st arg: audit arch */ | 560 | movl $AUDIT_ARCH_X86_64,%edi /* 1st arg: audit arch */ |
560 | call audit_syscall_entry | 561 | call __audit_syscall_entry |
561 | LOAD_ARGS 0 /* reload call-clobbered registers */ | 562 | LOAD_ARGS 0 /* reload call-clobbered registers */ |
562 | jmp system_call_fastpath | 563 | jmp system_call_fastpath |
563 | 564 | ||
564 | /* | 565 | /* |
565 | * Return fast path for syscall audit. Call audit_syscall_exit() | 566 | * Return fast path for syscall audit. Call __audit_syscall_exit() |
566 | * directly and then jump back to the fast path with TIF_SYSCALL_AUDIT | 567 | * directly and then jump back to the fast path with TIF_SYSCALL_AUDIT |
567 | * masked off. | 568 | * masked off. |
568 | */ | 569 | */ |
569 | sysret_audit: | 570 | sysret_audit: |
570 | movq RAX-ARGOFFSET(%rsp),%rsi /* second arg, syscall return value */ | 571 | movq RAX-ARGOFFSET(%rsp),%rsi /* second arg, syscall return value */ |
571 | cmpq $0,%rsi /* is it < 0? */ | 572 | cmpq $-MAX_ERRNO,%rsi /* is it < -MAX_ERRNO? */ |
572 | setl %al /* 1 if so, 0 if not */ | 573 | setbe %al /* 1 if so, 0 if not */ |
573 | movzbl %al,%edi /* zero-extend that into %edi */ | 574 | movzbl %al,%edi /* zero-extend that into %edi */ |
574 | inc %edi /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */ | 575 | call __audit_syscall_exit |
575 | call audit_syscall_exit | ||
576 | movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi | 576 | movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi |
577 | jmp sysret_check | 577 | jmp sysret_check |
578 | #endif /* CONFIG_AUDITSYSCALL */ | 578 | #endif /* CONFIG_AUDITSYSCALL */ |
@@ -1479,62 +1479,221 @@ ENTRY(error_exit) | |||
1479 | CFI_ENDPROC | 1479 | CFI_ENDPROC |
1480 | END(error_exit) | 1480 | END(error_exit) |
1481 | 1481 | ||
1482 | /* | ||
1483 | * Test if a given stack is an NMI stack or not. | ||
1484 | */ | ||
1485 | .macro test_in_nmi reg stack nmi_ret normal_ret | ||
1486 | cmpq %\reg, \stack | ||
1487 | ja \normal_ret | ||
1488 | subq $EXCEPTION_STKSZ, %\reg | ||
1489 | cmpq %\reg, \stack | ||
1490 | jb \normal_ret | ||
1491 | jmp \nmi_ret | ||
1492 | .endm | ||
1482 | 1493 | ||
1483 | /* runs on exception stack */ | 1494 | /* runs on exception stack */ |
1484 | ENTRY(nmi) | 1495 | ENTRY(nmi) |
1485 | INTR_FRAME | 1496 | INTR_FRAME |
1486 | PARAVIRT_ADJUST_EXCEPTION_FRAME | 1497 | PARAVIRT_ADJUST_EXCEPTION_FRAME |
1487 | pushq_cfi $-1 | 1498 | /* |
1499 | * We allow breakpoints in NMIs. If a breakpoint occurs, then | ||
1500 | * the iretq it performs will take us out of NMI context. | ||
1501 | * This means that we can have nested NMIs where the next | ||
1502 | * NMI is using the top of the stack of the previous NMI. We | ||
1503 | * can't let it execute because the nested NMI will corrupt the | ||
1504 | * stack of the previous NMI. NMI handlers are not re-entrant | ||
1505 | * anyway. | ||
1506 | * | ||
1507 | * To handle this case we do the following: | ||
1508 | * Check the a special location on the stack that contains | ||
1509 | * a variable that is set when NMIs are executing. | ||
1510 | * The interrupted task's stack is also checked to see if it | ||
1511 | * is an NMI stack. | ||
1512 | * If the variable is not set and the stack is not the NMI | ||
1513 | * stack then: | ||
1514 | * o Set the special variable on the stack | ||
1515 | * o Copy the interrupt frame into a "saved" location on the stack | ||
1516 | * o Copy the interrupt frame into a "copy" location on the stack | ||
1517 | * o Continue processing the NMI | ||
1518 | * If the variable is set or the previous stack is the NMI stack: | ||
1519 | * o Modify the "copy" location to jump to the repeate_nmi | ||
1520 | * o return back to the first NMI | ||
1521 | * | ||
1522 | * Now on exit of the first NMI, we first clear the stack variable | ||
1523 | * The NMI stack will tell any nested NMIs at that point that it is | ||
1524 | * nested. Then we pop the stack normally with iret, and if there was | ||
1525 | * a nested NMI that updated the copy interrupt stack frame, a | ||
1526 | * jump will be made to the repeat_nmi code that will handle the second | ||
1527 | * NMI. | ||
1528 | */ | ||
1529 | |||
1530 | /* Use %rdx as out temp variable throughout */ | ||
1531 | pushq_cfi %rdx | ||
1532 | |||
1533 | /* | ||
1534 | * If %cs was not the kernel segment, then the NMI triggered in user | ||
1535 | * space, which means it is definitely not nested. | ||
1536 | */ | ||
1537 | cmpl $__KERNEL_CS, 16(%rsp) | ||
1538 | jne first_nmi | ||
1539 | |||
1540 | /* | ||
1541 | * Check the special variable on the stack to see if NMIs are | ||
1542 | * executing. | ||
1543 | */ | ||
1544 | cmpl $1, -8(%rsp) | ||
1545 | je nested_nmi | ||
1546 | |||
1547 | /* | ||
1548 | * Now test if the previous stack was an NMI stack. | ||
1549 | * We need the double check. We check the NMI stack to satisfy the | ||
1550 | * race when the first NMI clears the variable before returning. | ||
1551 | * We check the variable because the first NMI could be in a | ||
1552 | * breakpoint routine using a breakpoint stack. | ||
1553 | */ | ||
1554 | lea 6*8(%rsp), %rdx | ||
1555 | test_in_nmi rdx, 4*8(%rsp), nested_nmi, first_nmi | ||
1556 | |||
1557 | nested_nmi: | ||
1558 | /* | ||
1559 | * Do nothing if we interrupted the fixup in repeat_nmi. | ||
1560 | * It's about to repeat the NMI handler, so we are fine | ||
1561 | * with ignoring this one. | ||
1562 | */ | ||
1563 | movq $repeat_nmi, %rdx | ||
1564 | cmpq 8(%rsp), %rdx | ||
1565 | ja 1f | ||
1566 | movq $end_repeat_nmi, %rdx | ||
1567 | cmpq 8(%rsp), %rdx | ||
1568 | ja nested_nmi_out | ||
1569 | |||
1570 | 1: | ||
1571 | /* Set up the interrupted NMIs stack to jump to repeat_nmi */ | ||
1572 | leaq -6*8(%rsp), %rdx | ||
1573 | movq %rdx, %rsp | ||
1574 | CFI_ADJUST_CFA_OFFSET 6*8 | ||
1575 | pushq_cfi $__KERNEL_DS | ||
1576 | pushq_cfi %rdx | ||
1577 | pushfq_cfi | ||
1578 | pushq_cfi $__KERNEL_CS | ||
1579 | pushq_cfi $repeat_nmi | ||
1580 | |||
1581 | /* Put stack back */ | ||
1582 | addq $(11*8), %rsp | ||
1583 | CFI_ADJUST_CFA_OFFSET -11*8 | ||
1584 | |||
1585 | nested_nmi_out: | ||
1586 | popq_cfi %rdx | ||
1587 | |||
1588 | /* No need to check faults here */ | ||
1589 | INTERRUPT_RETURN | ||
1590 | |||
1591 | first_nmi: | ||
1592 | /* | ||
1593 | * Because nested NMIs will use the pushed location that we | ||
1594 | * stored in rdx, we must keep that space available. | ||
1595 | * Here's what our stack frame will look like: | ||
1596 | * +-------------------------+ | ||
1597 | * | original SS | | ||
1598 | * | original Return RSP | | ||
1599 | * | original RFLAGS | | ||
1600 | * | original CS | | ||
1601 | * | original RIP | | ||
1602 | * +-------------------------+ | ||
1603 | * | temp storage for rdx | | ||
1604 | * +-------------------------+ | ||
1605 | * | NMI executing variable | | ||
1606 | * +-------------------------+ | ||
1607 | * | Saved SS | | ||
1608 | * | Saved Return RSP | | ||
1609 | * | Saved RFLAGS | | ||
1610 | * | Saved CS | | ||
1611 | * | Saved RIP | | ||
1612 | * +-------------------------+ | ||
1613 | * | copied SS | | ||
1614 | * | copied Return RSP | | ||
1615 | * | copied RFLAGS | | ||
1616 | * | copied CS | | ||
1617 | * | copied RIP | | ||
1618 | * +-------------------------+ | ||
1619 | * | pt_regs | | ||
1620 | * +-------------------------+ | ||
1621 | * | ||
1622 | * The saved RIP is used to fix up the copied RIP that a nested | ||
1623 | * NMI may zero out. The original stack frame and the temp storage | ||
1624 | * is also used by nested NMIs and can not be trusted on exit. | ||
1625 | */ | ||
1626 | /* Set the NMI executing variable on the stack. */ | ||
1627 | pushq_cfi $1 | ||
1628 | |||
1629 | /* Copy the stack frame to the Saved frame */ | ||
1630 | .rept 5 | ||
1631 | pushq_cfi 6*8(%rsp) | ||
1632 | .endr | ||
1633 | |||
1634 | /* Make another copy, this one may be modified by nested NMIs */ | ||
1635 | .rept 5 | ||
1636 | pushq_cfi 4*8(%rsp) | ||
1637 | .endr | ||
1638 | |||
1639 | /* Do not pop rdx, nested NMIs will corrupt it */ | ||
1640 | movq 11*8(%rsp), %rdx | ||
1641 | |||
1642 | /* | ||
1643 | * Everything below this point can be preempted by a nested | ||
1644 | * NMI if the first NMI took an exception. Repeated NMIs | ||
1645 | * caused by an exception and nested NMI will start here, and | ||
1646 | * can still be preempted by another NMI. | ||
1647 | */ | ||
1648 | restart_nmi: | ||
1649 | pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ | ||
1488 | subq $ORIG_RAX-R15, %rsp | 1650 | subq $ORIG_RAX-R15, %rsp |
1489 | CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 | 1651 | CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 |
1652 | /* | ||
1653 | * Use save_paranoid to handle SWAPGS, but no need to use paranoid_exit | ||
1654 | * as we should not be calling schedule in NMI context. | ||
1655 | * Even with normal interrupts enabled. An NMI should not be | ||
1656 | * setting NEED_RESCHED or anything that normal interrupts and | ||
1657 | * exceptions might do. | ||
1658 | */ | ||
1490 | call save_paranoid | 1659 | call save_paranoid |
1491 | DEFAULT_FRAME 0 | 1660 | DEFAULT_FRAME 0 |
1492 | /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */ | 1661 | /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */ |
1493 | movq %rsp,%rdi | 1662 | movq %rsp,%rdi |
1494 | movq $-1,%rsi | 1663 | movq $-1,%rsi |
1495 | call do_nmi | 1664 | call do_nmi |
1496 | #ifdef CONFIG_TRACE_IRQFLAGS | ||
1497 | /* paranoidexit; without TRACE_IRQS_OFF */ | ||
1498 | /* ebx: no swapgs flag */ | ||
1499 | DISABLE_INTERRUPTS(CLBR_NONE) | ||
1500 | testl %ebx,%ebx /* swapgs needed? */ | 1665 | testl %ebx,%ebx /* swapgs needed? */ |
1501 | jnz nmi_restore | 1666 | jnz nmi_restore |
1502 | testl $3,CS(%rsp) | ||
1503 | jnz nmi_userspace | ||
1504 | nmi_swapgs: | 1667 | nmi_swapgs: |
1505 | SWAPGS_UNSAFE_STACK | 1668 | SWAPGS_UNSAFE_STACK |
1506 | nmi_restore: | 1669 | nmi_restore: |
1507 | RESTORE_ALL 8 | 1670 | RESTORE_ALL 8 |
1671 | /* Clear the NMI executing stack variable */ | ||
1672 | movq $0, 10*8(%rsp) | ||
1508 | jmp irq_return | 1673 | jmp irq_return |
1509 | nmi_userspace: | ||
1510 | GET_THREAD_INFO(%rcx) | ||
1511 | movl TI_flags(%rcx),%ebx | ||
1512 | andl $_TIF_WORK_MASK,%ebx | ||
1513 | jz nmi_swapgs | ||
1514 | movq %rsp,%rdi /* &pt_regs */ | ||
1515 | call sync_regs | ||
1516 | movq %rax,%rsp /* switch stack for scheduling */ | ||
1517 | testl $_TIF_NEED_RESCHED,%ebx | ||
1518 | jnz nmi_schedule | ||
1519 | movl %ebx,%edx /* arg3: thread flags */ | ||
1520 | ENABLE_INTERRUPTS(CLBR_NONE) | ||
1521 | xorl %esi,%esi /* arg2: oldset */ | ||
1522 | movq %rsp,%rdi /* arg1: &pt_regs */ | ||
1523 | call do_notify_resume | ||
1524 | DISABLE_INTERRUPTS(CLBR_NONE) | ||
1525 | jmp nmi_userspace | ||
1526 | nmi_schedule: | ||
1527 | ENABLE_INTERRUPTS(CLBR_ANY) | ||
1528 | call schedule | ||
1529 | DISABLE_INTERRUPTS(CLBR_ANY) | ||
1530 | jmp nmi_userspace | ||
1531 | CFI_ENDPROC | ||
1532 | #else | ||
1533 | jmp paranoid_exit | ||
1534 | CFI_ENDPROC | 1674 | CFI_ENDPROC |
1535 | #endif | ||
1536 | END(nmi) | 1675 | END(nmi) |
1537 | 1676 | ||
1677 | /* | ||
1678 | * If an NMI hit an iret because of an exception or breakpoint, | ||
1679 | * it can lose its NMI context, and a nested NMI may come in. | ||
1680 | * In that case, the nested NMI will change the preempted NMI's | ||
1681 | * stack to jump to here when it does the final iret. | ||
1682 | */ | ||
1683 | repeat_nmi: | ||
1684 | INTR_FRAME | ||
1685 | /* Update the stack variable to say we are still in NMI */ | ||
1686 | movq $1, 5*8(%rsp) | ||
1687 | |||
1688 | /* copy the saved stack back to copy stack */ | ||
1689 | .rept 5 | ||
1690 | pushq_cfi 4*8(%rsp) | ||
1691 | .endr | ||
1692 | |||
1693 | jmp restart_nmi | ||
1694 | CFI_ENDPROC | ||
1695 | end_repeat_nmi: | ||
1696 | |||
1538 | ENTRY(ignore_sysret) | 1697 | ENTRY(ignore_sysret) |
1539 | CFI_STARTPROC | 1698 | CFI_STARTPROC |
1540 | mov $-ENOSYS,%eax | 1699 | mov $-ENOSYS,%eax |