diff options
-rw-r--r-- | arch/x86/kernel/entry_64.S | 177 |
1 files changed, 177 insertions, 0 deletions
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index d1d5434e7f6a..b62aa298df7f 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
@@ -1475,11 +1475,166 @@ ENTRY(error_exit) | |||
1475 | CFI_ENDPROC | 1475 | CFI_ENDPROC |
1476 | END(error_exit) | 1476 | END(error_exit) |
1477 | 1477 | ||
1478 | /* | ||
1479 | * Test if a given stack is an NMI stack or not. | ||
1480 | */ | ||
1481 | .macro test_in_nmi reg stack nmi_ret normal_ret | ||
1482 | cmpq %\reg, \stack | ||
1483 | ja \normal_ret | ||
1484 | subq $EXCEPTION_STKSZ, %\reg | ||
1485 | cmpq %\reg, \stack | ||
1486 | jb \normal_ret | ||
1487 | jmp \nmi_ret | ||
1488 | .endm | ||
1478 | 1489 | ||
1479 | /* runs on exception stack */ | 1490 | /* runs on exception stack */ |
1480 | ENTRY(nmi) | 1491 | ENTRY(nmi) |
1481 | INTR_FRAME | 1492 | INTR_FRAME |
1482 | PARAVIRT_ADJUST_EXCEPTION_FRAME | 1493 | PARAVIRT_ADJUST_EXCEPTION_FRAME |
1494 | /* | ||
1495 | * We allow breakpoints in NMIs. If a breakpoint occurs, then | ||
1496 | * the iretq it performs will take us out of NMI context. | ||
1497 | * This means that we can have nested NMIs where the next | ||
1498 | * NMI is using the top of the stack of the previous NMI. We | ||
1499 | * can't let it execute because the nested NMI will corrupt the | ||
1500 | * stack of the previous NMI. NMI handlers are not re-entrant | ||
1501 | * anyway. | ||
1502 | * | ||
1503 | * To handle this case we do the following: | ||
1504 | * Check the a special location on the stack that contains | ||
1505 | * a variable that is set when NMIs are executing. | ||
1506 | * The interrupted task's stack is also checked to see if it | ||
1507 | * is an NMI stack. | ||
1508 | * If the variable is not set and the stack is not the NMI | ||
1509 | * stack then: | ||
1510 | * o Set the special variable on the stack | ||
1511 | * o Copy the interrupt frame into a "saved" location on the stack | ||
1512 | * o Copy the interrupt frame into a "copy" location on the stack | ||
1513 | * o Continue processing the NMI | ||
1514 | * If the variable is set or the previous stack is the NMI stack: | ||
1515 | * o Modify the "copy" location to jump to the repeate_nmi | ||
1516 | * o return back to the first NMI | ||
1517 | * | ||
1518 | * Now on exit of the first NMI, we first clear the stack variable | ||
1519 | * The NMI stack will tell any nested NMIs at that point that it is | ||
1520 | * nested. Then we pop the stack normally with iret, and if there was | ||
1521 | * a nested NMI that updated the copy interrupt stack frame, a | ||
1522 | * jump will be made to the repeat_nmi code that will handle the second | ||
1523 | * NMI. | ||
1524 | */ | ||
1525 | |||
1526 | /* Use %rdx as out temp variable throughout */ | ||
1527 | pushq_cfi %rdx | ||
1528 | |||
1529 | /* | ||
1530 | * Check the special variable on the stack to see if NMIs are | ||
1531 | * executing. | ||
1532 | */ | ||
1533 | cmp $1, -8(%rsp) | ||
1534 | je nested_nmi | ||
1535 | |||
1536 | /* | ||
1537 | * Now test if the previous stack was an NMI stack. | ||
1538 | * We need the double check. We check the NMI stack to satisfy the | ||
1539 | * race when the first NMI clears the variable before returning. | ||
1540 | * We check the variable because the first NMI could be in a | ||
1541 | * breakpoint routine using a breakpoint stack. | ||
1542 | */ | ||
1543 | lea 6*8(%rsp), %rdx | ||
1544 | test_in_nmi rdx, 4*8(%rsp), nested_nmi, first_nmi | ||
1545 | |||
1546 | nested_nmi: | ||
1547 | /* | ||
1548 | * Do nothing if we interrupted the fixup in repeat_nmi. | ||
1549 | * It's about to repeat the NMI handler, so we are fine | ||
1550 | * with ignoring this one. | ||
1551 | */ | ||
1552 | movq $repeat_nmi, %rdx | ||
1553 | cmpq 8(%rsp), %rdx | ||
1554 | ja 1f | ||
1555 | movq $end_repeat_nmi, %rdx | ||
1556 | cmpq 8(%rsp), %rdx | ||
1557 | ja nested_nmi_out | ||
1558 | |||
1559 | 1: | ||
1560 | /* Set up the interrupted NMIs stack to jump to repeat_nmi */ | ||
1561 | leaq -6*8(%rsp), %rdx | ||
1562 | movq %rdx, %rsp | ||
1563 | CFI_ADJUST_CFA_OFFSET 6*8 | ||
1564 | pushq_cfi $__KERNEL_DS | ||
1565 | pushq_cfi %rdx | ||
1566 | pushfq_cfi | ||
1567 | pushq_cfi $__KERNEL_CS | ||
1568 | pushq_cfi $repeat_nmi | ||
1569 | |||
1570 | /* Put stack back */ | ||
1571 | addq $(11*8), %rsp | ||
1572 | CFI_ADJUST_CFA_OFFSET -11*8 | ||
1573 | |||
1574 | nested_nmi_out: | ||
1575 | popq_cfi %rdx | ||
1576 | |||
1577 | /* No need to check faults here */ | ||
1578 | INTERRUPT_RETURN | ||
1579 | |||
1580 | first_nmi: | ||
1581 | /* | ||
1582 | * Because nested NMIs will use the pushed location that we | ||
1583 | * stored in rdx, we must keep that space available. | ||
1584 | * Here's what our stack frame will look like: | ||
1585 | * +-------------------------+ | ||
1586 | * | original SS | | ||
1587 | * | original Return RSP | | ||
1588 | * | original RFLAGS | | ||
1589 | * | original CS | | ||
1590 | * | original RIP | | ||
1591 | * +-------------------------+ | ||
1592 | * | temp storage for rdx | | ||
1593 | * +-------------------------+ | ||
1594 | * | NMI executing variable | | ||
1595 | * +-------------------------+ | ||
1596 | * | Saved SS | | ||
1597 | * | Saved Return RSP | | ||
1598 | * | Saved RFLAGS | | ||
1599 | * | Saved CS | | ||
1600 | * | Saved RIP | | ||
1601 | * +-------------------------+ | ||
1602 | * | copied SS | | ||
1603 | * | copied Return RSP | | ||
1604 | * | copied RFLAGS | | ||
1605 | * | copied CS | | ||
1606 | * | copied RIP | | ||
1607 | * +-------------------------+ | ||
1608 | * | pt_regs | | ||
1609 | * +-------------------------+ | ||
1610 | * | ||
1611 | * The saved RIP is used to fix up the copied RIP that a nested | ||
1612 | * NMI may zero out. The original stack frame and the temp storage | ||
1613 | * is also used by nested NMIs and can not be trusted on exit. | ||
1614 | */ | ||
1615 | /* Set the NMI executing variable on the stack. */ | ||
1616 | pushq_cfi $1 | ||
1617 | |||
1618 | /* Copy the stack frame to the Saved frame */ | ||
1619 | .rept 5 | ||
1620 | pushq_cfi 6*8(%rsp) | ||
1621 | .endr | ||
1622 | |||
1623 | /* Make another copy, this one may be modified by nested NMIs */ | ||
1624 | .rept 5 | ||
1625 | pushq_cfi 4*8(%rsp) | ||
1626 | .endr | ||
1627 | |||
1628 | /* Do not pop rdx, nested NMIs will corrupt it */ | ||
1629 | movq 11*8(%rsp), %rdx | ||
1630 | |||
1631 | /* | ||
1632 | * Everything below this point can be preempted by a nested | ||
1633 | * NMI if the first NMI took an exception. Repeated NMIs | ||
1634 | * caused by an exception and nested NMI will start here, and | ||
1635 | * can still be preempted by another NMI. | ||
1636 | */ | ||
1637 | restart_nmi: | ||
1483 | pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ | 1638 | pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ |
1484 | subq $ORIG_RAX-R15, %rsp | 1639 | subq $ORIG_RAX-R15, %rsp |
1485 | CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 | 1640 | CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 |
@@ -1502,10 +1657,32 @@ nmi_swapgs: | |||
1502 | SWAPGS_UNSAFE_STACK | 1657 | SWAPGS_UNSAFE_STACK |
1503 | nmi_restore: | 1658 | nmi_restore: |
1504 | RESTORE_ALL 8 | 1659 | RESTORE_ALL 8 |
1660 | /* Clear the NMI executing stack variable */ | ||
1661 | movq $0, 10*8(%rsp) | ||
1505 | jmp irq_return | 1662 | jmp irq_return |
1506 | CFI_ENDPROC | 1663 | CFI_ENDPROC |
1507 | END(nmi) | 1664 | END(nmi) |
1508 | 1665 | ||
1666 | /* | ||
1667 | * If an NMI hit an iret because of an exception or breakpoint, | ||
1668 | * it can lose its NMI context, and a nested NMI may come in. | ||
1669 | * In that case, the nested NMI will change the preempted NMI's | ||
1670 | * stack to jump to here when it does the final iret. | ||
1671 | */ | ||
1672 | repeat_nmi: | ||
1673 | INTR_FRAME | ||
1674 | /* Update the stack variable to say we are still in NMI */ | ||
1675 | movq $1, 5*8(%rsp) | ||
1676 | |||
1677 | /* copy the saved stack back to copy stack */ | ||
1678 | .rept 5 | ||
1679 | pushq_cfi 4*8(%rsp) | ||
1680 | .endr | ||
1681 | |||
1682 | jmp restart_nmi | ||
1683 | CFI_ENDPROC | ||
1684 | end_repeat_nmi: | ||
1685 | |||
1509 | ENTRY(ignore_sysret) | 1686 | ENTRY(ignore_sysret) |
1510 | CFI_STARTPROC | 1687 | CFI_STARTPROC |
1511 | mov $-ENOSYS,%eax | 1688 | mov $-ENOSYS,%eax |