diff options
Diffstat (limited to 'arch/tile/kernel/intvec_32.S')
-rw-r--r-- | arch/tile/kernel/intvec_32.S | 175 |
1 files changed, 43 insertions, 132 deletions
diff --git a/arch/tile/kernel/intvec_32.S b/arch/tile/kernel/intvec_32.S index fffcfa6b3a62..72ade79b621b 100644 --- a/arch/tile/kernel/intvec_32.S +++ b/arch/tile/kernel/intvec_32.S | |||
@@ -851,14 +851,27 @@ STD_ENTRY(interrupt_return) | |||
851 | /* Check to see if there is any work to do before returning to user. */ | 851 | /* Check to see if there is any work to do before returning to user. */ |
852 | { | 852 | { |
853 | addi r29, r32, THREAD_INFO_FLAGS_OFFSET | 853 | addi r29, r32, THREAD_INFO_FLAGS_OFFSET |
854 | moveli r28, lo16(_TIF_ALLWORK_MASK) | 854 | moveli r1, lo16(_TIF_ALLWORK_MASK) |
855 | } | 855 | } |
856 | { | 856 | { |
857 | lw r29, r29 | 857 | lw r29, r29 |
858 | auli r28, r28, ha16(_TIF_ALLWORK_MASK) | 858 | auli r1, r1, ha16(_TIF_ALLWORK_MASK) |
859 | } | 859 | } |
860 | and r28, r29, r28 | 860 | and r1, r29, r1 |
861 | bnz r28, .Lwork_pending | 861 | bzt r1, .Lrestore_all |
862 | |||
863 | /* | ||
864 | * Make sure we have all the registers saved for signal | ||
865 | * handling or single-step. Call out to C code to figure out | ||
866 | * exactly what we need to do for each flag bit, then if | ||
867 | * necessary, reload the flags and recheck. | ||
868 | */ | ||
869 | push_extra_callee_saves r0 | ||
870 | { | ||
871 | PTREGS_PTR(r0, PTREGS_OFFSET_BASE) | ||
872 | jal do_work_pending | ||
873 | } | ||
874 | bnz r0, .Lresume_userspace | ||
862 | 875 | ||
863 | /* | 876 | /* |
864 | * In the NMI case we | 877 | * In the NMI case we |
@@ -1099,99 +1112,6 @@ STD_ENTRY(interrupt_return) | |||
1099 | pop_reg r50 | 1112 | pop_reg r50 |
1100 | pop_reg r51, sp, PTREGS_OFFSET_REG(29) - PTREGS_OFFSET_REG(51) | 1113 | pop_reg r51, sp, PTREGS_OFFSET_REG(29) - PTREGS_OFFSET_REG(51) |
1101 | j .Lcontinue_restore_regs | 1114 | j .Lcontinue_restore_regs |
1102 | |||
1103 | .Lwork_pending: | ||
1104 | /* Mask the reschedule flag */ | ||
1105 | andi r28, r29, _TIF_NEED_RESCHED | ||
1106 | |||
1107 | { | ||
1108 | /* | ||
1109 | * If the NEED_RESCHED flag is called, we call schedule(), which | ||
1110 | * may drop this context right here and go do something else. | ||
1111 | * On return, jump back to .Lresume_userspace and recheck. | ||
1112 | */ | ||
1113 | bz r28, .Lasync_tlb | ||
1114 | |||
1115 | /* Mask the async-tlb flag */ | ||
1116 | andi r28, r29, _TIF_ASYNC_TLB | ||
1117 | } | ||
1118 | |||
1119 | jal schedule | ||
1120 | FEEDBACK_REENTER(interrupt_return) | ||
1121 | |||
1122 | /* Reload the flags and check again */ | ||
1123 | j .Lresume_userspace | ||
1124 | |||
1125 | .Lasync_tlb: | ||
1126 | { | ||
1127 | bz r28, .Lneed_sigpending | ||
1128 | |||
1129 | /* Mask the sigpending flag */ | ||
1130 | andi r28, r29, _TIF_SIGPENDING | ||
1131 | } | ||
1132 | |||
1133 | PTREGS_PTR(r0, PTREGS_OFFSET_BASE) | ||
1134 | jal do_async_page_fault | ||
1135 | FEEDBACK_REENTER(interrupt_return) | ||
1136 | |||
1137 | /* | ||
1138 | * Go restart the "resume userspace" process. We may have | ||
1139 | * fired a signal, and we need to disable interrupts again. | ||
1140 | */ | ||
1141 | j .Lresume_userspace | ||
1142 | |||
1143 | .Lneed_sigpending: | ||
1144 | /* | ||
1145 | * At this point we are either doing signal handling or single-step, | ||
1146 | * so either way make sure we have all the registers saved. | ||
1147 | */ | ||
1148 | push_extra_callee_saves r0 | ||
1149 | |||
1150 | { | ||
1151 | /* If no signal pending, skip to singlestep check */ | ||
1152 | bz r28, .Lneed_singlestep | ||
1153 | |||
1154 | /* Mask the singlestep flag */ | ||
1155 | andi r28, r29, _TIF_SINGLESTEP | ||
1156 | } | ||
1157 | |||
1158 | jal do_signal | ||
1159 | FEEDBACK_REENTER(interrupt_return) | ||
1160 | |||
1161 | /* Reload the flags and check again */ | ||
1162 | j .Lresume_userspace | ||
1163 | |||
1164 | .Lneed_singlestep: | ||
1165 | { | ||
1166 | /* Get a pointer to the EX1 field */ | ||
1167 | PTREGS_PTR(r29, PTREGS_OFFSET_EX1) | ||
1168 | |||
1169 | /* If we get here, our bit must be set. */ | ||
1170 | bz r28, .Lwork_confusion | ||
1171 | } | ||
1172 | /* If we are in priv mode, don't single step */ | ||
1173 | lw r28, r29 | ||
1174 | andi r28, r28, SPR_EX_CONTEXT_1_1__PL_MASK /* mask off ICS */ | ||
1175 | bnz r28, .Lrestore_all | ||
1176 | |||
1177 | /* Allow interrupts within the single step code */ | ||
1178 | TRACE_IRQS_ON /* Note: clobbers registers r0-r29 */ | ||
1179 | IRQ_ENABLE(r20, r21) | ||
1180 | |||
1181 | /* try to single-step the current instruction */ | ||
1182 | PTREGS_PTR(r0, PTREGS_OFFSET_BASE) | ||
1183 | jal single_step_once | ||
1184 | FEEDBACK_REENTER(interrupt_return) | ||
1185 | |||
1186 | /* Re-disable interrupts. TRACE_IRQS_OFF in .Lrestore_all. */ | ||
1187 | IRQ_DISABLE(r20,r21) | ||
1188 | |||
1189 | j .Lrestore_all | ||
1190 | |||
1191 | .Lwork_confusion: | ||
1192 | move r0, r28 | ||
1193 | panic "thread_info allwork flags unhandled on userspace resume: %#x" | ||
1194 | |||
1195 | STD_ENDPROC(interrupt_return) | 1115 | STD_ENDPROC(interrupt_return) |
1196 | 1116 | ||
1197 | /* | 1117 | /* |
@@ -1550,7 +1470,10 @@ STD_ENTRY(_sys_clone) | |||
1550 | * We place it in the __HEAD section to ensure it is relatively | 1470 | * We place it in the __HEAD section to ensure it is relatively |
1551 | * near to the intvec_SWINT_1 code (reachable by a conditional branch). | 1471 | * near to the intvec_SWINT_1 code (reachable by a conditional branch). |
1552 | * | 1472 | * |
1553 | * Must match register usage in do_page_fault(). | 1473 | * Our use of ATOMIC_LOCK_REG here must match do_page_fault_ics(). |
1474 | * | ||
1475 | * As we do in lib/atomic_asm_32.S, we bypass a store if the value we | ||
1476 | * would store is the same as the value we just loaded. | ||
1554 | */ | 1477 | */ |
1555 | __HEAD | 1478 | __HEAD |
1556 | .align 64 | 1479 | .align 64 |
@@ -1611,17 +1534,7 @@ ENTRY(sys_cmpxchg) | |||
1611 | { | 1534 | { |
1612 | shri r20, r25, 32 - ATOMIC_HASH_L1_SHIFT | 1535 | shri r20, r25, 32 - ATOMIC_HASH_L1_SHIFT |
1613 | slt_u r23, r0, r23 | 1536 | slt_u r23, r0, r23 |
1614 | 1537 | lw r26, r0 /* see comment in the "#else" for the "lw r26". */ | |
1615 | /* | ||
1616 | * Ensure that the TLB is loaded before we take out the lock. | ||
1617 | * On TILEPro, this will start fetching the value all the way | ||
1618 | * into our L1 as well (and if it gets modified before we | ||
1619 | * grab the lock, it will be invalidated from our cache | ||
1620 | * before we reload it). On tile64, we'll start fetching it | ||
1621 | * into our L1 if we're the home, and if we're not, we'll | ||
1622 | * still at least start fetching it into the home's L2. | ||
1623 | */ | ||
1624 | lw r26, r0 | ||
1625 | } | 1538 | } |
1626 | { | 1539 | { |
1627 | s2a r21, r20, r21 | 1540 | s2a r21, r20, r21 |
@@ -1637,18 +1550,9 @@ ENTRY(sys_cmpxchg) | |||
1637 | bbs r23, .Lcmpxchg64 | 1550 | bbs r23, .Lcmpxchg64 |
1638 | andi r23, r0, 7 /* Precompute alignment for cmpxchg64. */ | 1551 | andi r23, r0, 7 /* Precompute alignment for cmpxchg64. */ |
1639 | } | 1552 | } |
1640 | |||
1641 | { | 1553 | { |
1642 | /* | ||
1643 | * We very carefully align the code that actually runs with | ||
1644 | * the lock held (nine bundles) so that we know it is all in | ||
1645 | * the icache when we start. This instruction (the jump) is | ||
1646 | * at the start of the first cache line, address zero mod 64; | ||
1647 | * we jump to somewhere in the second cache line to issue the | ||
1648 | * tns, then jump back to finish up. | ||
1649 | */ | ||
1650 | s2a ATOMIC_LOCK_REG_NAME, r25, r21 | 1554 | s2a ATOMIC_LOCK_REG_NAME, r25, r21 |
1651 | j .Lcmpxchg32_tns | 1555 | j .Lcmpxchg32_tns /* see comment in the #else for the jump. */ |
1652 | } | 1556 | } |
1653 | 1557 | ||
1654 | #else /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */ | 1558 | #else /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */ |
@@ -1713,24 +1617,25 @@ ENTRY(sys_cmpxchg) | |||
1713 | { | 1617 | { |
1714 | /* | 1618 | /* |
1715 | * We very carefully align the code that actually runs with | 1619 | * We very carefully align the code that actually runs with |
1716 | * the lock held (nine bundles) so that we know it is all in | 1620 | * the lock held (twelve bundles) so that we know it is all in |
1717 | * the icache when we start. This instruction (the jump) is | 1621 | * the icache when we start. This instruction (the jump) is |
1718 | * at the start of the first cache line, address zero mod 64; | 1622 | * at the start of the first cache line, address zero mod 64; |
1719 | * we jump to somewhere in the second cache line to issue the | 1623 | * we jump to the very end of the second cache line to get that |
1720 | * tns, then jump back to finish up. | 1624 | * line loaded in the icache, then fall through to issue the tns |
1625 | * in the third cache line, at which point it's all cached. | ||
1626 | * Note that is for performance, not correctness. | ||
1721 | */ | 1627 | */ |
1722 | j .Lcmpxchg32_tns | 1628 | j .Lcmpxchg32_tns |
1723 | } | 1629 | } |
1724 | 1630 | ||
1725 | #endif /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */ | 1631 | #endif /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */ |
1726 | 1632 | ||
1727 | ENTRY(__sys_cmpxchg_grab_lock) | 1633 | /* Symbol for do_page_fault_ics() to use to compare against the PC. */ |
1634 | .global __sys_cmpxchg_grab_lock | ||
1635 | __sys_cmpxchg_grab_lock: | ||
1728 | 1636 | ||
1729 | /* | 1637 | /* |
1730 | * Perform the actual cmpxchg or atomic_update. | 1638 | * Perform the actual cmpxchg or atomic_update. |
1731 | * Note that the system <arch/atomic.h> header relies on | ||
1732 | * atomic_update() to always perform an "mf", so don't make | ||
1733 | * it optional or conditional without modifying that code. | ||
1734 | */ | 1639 | */ |
1735 | .Ldo_cmpxchg32: | 1640 | .Ldo_cmpxchg32: |
1736 | { | 1641 | { |
@@ -1748,10 +1653,13 @@ ENTRY(sys_cmpxchg) | |||
1748 | } | 1653 | } |
1749 | { | 1654 | { |
1750 | mvnz r24, r23, r25 /* Use atomic_update value if appropriate. */ | 1655 | mvnz r24, r23, r25 /* Use atomic_update value if appropriate. */ |
1751 | bbns r22, .Lcmpxchg32_mismatch | 1656 | bbns r22, .Lcmpxchg32_nostore |
1752 | } | 1657 | } |
1658 | seq r22, r24, r21 /* Are we storing the value we loaded? */ | ||
1659 | bbs r22, .Lcmpxchg32_nostore | ||
1753 | sw r0, r24 | 1660 | sw r0, r24 |
1754 | 1661 | ||
1662 | /* The following instruction is the start of the second cache line. */ | ||
1755 | /* Do slow mtspr here so the following "mf" waits less. */ | 1663 | /* Do slow mtspr here so the following "mf" waits less. */ |
1756 | { | 1664 | { |
1757 | move sp, r27 | 1665 | move sp, r27 |
@@ -1759,7 +1667,6 @@ ENTRY(sys_cmpxchg) | |||
1759 | } | 1667 | } |
1760 | mf | 1668 | mf |
1761 | 1669 | ||
1762 | /* The following instruction is the start of the second cache line. */ | ||
1763 | { | 1670 | { |
1764 | move r0, r21 | 1671 | move r0, r21 |
1765 | sw ATOMIC_LOCK_REG_NAME, zero | 1672 | sw ATOMIC_LOCK_REG_NAME, zero |
@@ -1767,7 +1674,7 @@ ENTRY(sys_cmpxchg) | |||
1767 | iret | 1674 | iret |
1768 | 1675 | ||
1769 | /* Duplicated code here in the case where we don't overlap "mf" */ | 1676 | /* Duplicated code here in the case where we don't overlap "mf" */ |
1770 | .Lcmpxchg32_mismatch: | 1677 | .Lcmpxchg32_nostore: |
1771 | { | 1678 | { |
1772 | move r0, r21 | 1679 | move r0, r21 |
1773 | sw ATOMIC_LOCK_REG_NAME, zero | 1680 | sw ATOMIC_LOCK_REG_NAME, zero |
@@ -1783,8 +1690,6 @@ ENTRY(sys_cmpxchg) | |||
1783 | * and for 64-bit cmpxchg. We provide it as a macro and put | 1690 | * and for 64-bit cmpxchg. We provide it as a macro and put |
1784 | * it into both versions. We can't share the code literally | 1691 | * it into both versions. We can't share the code literally |
1785 | * since it depends on having the right branch-back address. | 1692 | * since it depends on having the right branch-back address. |
1786 | * Note that the first few instructions should share the cache | ||
1787 | * line with the second half of the actual locked code. | ||
1788 | */ | 1693 | */ |
1789 | .macro cmpxchg_lock, bitwidth | 1694 | .macro cmpxchg_lock, bitwidth |
1790 | 1695 | ||
@@ -1810,7 +1715,7 @@ ENTRY(sys_cmpxchg) | |||
1810 | } | 1715 | } |
1811 | /* | 1716 | /* |
1812 | * The preceding instruction is the last thing that must be | 1717 | * The preceding instruction is the last thing that must be |
1813 | * on the second cache line. | 1718 | * hot in the icache before we do the "tns" above. |
1814 | */ | 1719 | */ |
1815 | 1720 | ||
1816 | #ifdef CONFIG_SMP | 1721 | #ifdef CONFIG_SMP |
@@ -1841,6 +1746,12 @@ ENTRY(sys_cmpxchg) | |||
1841 | .endm | 1746 | .endm |
1842 | 1747 | ||
1843 | .Lcmpxchg32_tns: | 1748 | .Lcmpxchg32_tns: |
1749 | /* | ||
1750 | * This is the last instruction on the second cache line. | ||
1751 | * The nop here loads the second line, then we fall through | ||
1752 | * to the tns to load the third line before we take the lock. | ||
1753 | */ | ||
1754 | nop | ||
1844 | cmpxchg_lock 32 | 1755 | cmpxchg_lock 32 |
1845 | 1756 | ||
1846 | /* | 1757 | /* |