aboutsummaryrefslogtreecommitdiffstats
path: root/arch/tile/kernel/intvec_32.S
diff options
context:
space:
mode:
Diffstat (limited to 'arch/tile/kernel/intvec_32.S')
-rw-r--r--arch/tile/kernel/intvec_32.S175
1 files changed, 43 insertions, 132 deletions
diff --git a/arch/tile/kernel/intvec_32.S b/arch/tile/kernel/intvec_32.S
index fffcfa6b3a62..72ade79b621b 100644
--- a/arch/tile/kernel/intvec_32.S
+++ b/arch/tile/kernel/intvec_32.S
@@ -851,14 +851,27 @@ STD_ENTRY(interrupt_return)
851 /* Check to see if there is any work to do before returning to user. */ 851 /* Check to see if there is any work to do before returning to user. */
852 { 852 {
853 addi r29, r32, THREAD_INFO_FLAGS_OFFSET 853 addi r29, r32, THREAD_INFO_FLAGS_OFFSET
854 moveli r28, lo16(_TIF_ALLWORK_MASK) 854 moveli r1, lo16(_TIF_ALLWORK_MASK)
855 } 855 }
856 { 856 {
857 lw r29, r29 857 lw r29, r29
858 auli r28, r28, ha16(_TIF_ALLWORK_MASK) 858 auli r1, r1, ha16(_TIF_ALLWORK_MASK)
859 } 859 }
860 and r28, r29, r28 860 and r1, r29, r1
861 bnz r28, .Lwork_pending 861 bzt r1, .Lrestore_all
862
863 /*
864 * Make sure we have all the registers saved for signal
865 * handling or single-step. Call out to C code to figure out
866 * exactly what we need to do for each flag bit, then if
867 * necessary, reload the flags and recheck.
868 */
869 push_extra_callee_saves r0
870 {
871 PTREGS_PTR(r0, PTREGS_OFFSET_BASE)
872 jal do_work_pending
873 }
874 bnz r0, .Lresume_userspace
862 875
863 /* 876 /*
864 * In the NMI case we 877 * In the NMI case we
@@ -1099,99 +1112,6 @@ STD_ENTRY(interrupt_return)
1099 pop_reg r50 1112 pop_reg r50
1100 pop_reg r51, sp, PTREGS_OFFSET_REG(29) - PTREGS_OFFSET_REG(51) 1113 pop_reg r51, sp, PTREGS_OFFSET_REG(29) - PTREGS_OFFSET_REG(51)
1101 j .Lcontinue_restore_regs 1114 j .Lcontinue_restore_regs
1102
1103.Lwork_pending:
1104 /* Mask the reschedule flag */
1105 andi r28, r29, _TIF_NEED_RESCHED
1106
1107 {
1108 /*
1109 * If the NEED_RESCHED flag is called, we call schedule(), which
1110 * may drop this context right here and go do something else.
1111 * On return, jump back to .Lresume_userspace and recheck.
1112 */
1113 bz r28, .Lasync_tlb
1114
1115 /* Mask the async-tlb flag */
1116 andi r28, r29, _TIF_ASYNC_TLB
1117 }
1118
1119 jal schedule
1120 FEEDBACK_REENTER(interrupt_return)
1121
1122 /* Reload the flags and check again */
1123 j .Lresume_userspace
1124
1125.Lasync_tlb:
1126 {
1127 bz r28, .Lneed_sigpending
1128
1129 /* Mask the sigpending flag */
1130 andi r28, r29, _TIF_SIGPENDING
1131 }
1132
1133 PTREGS_PTR(r0, PTREGS_OFFSET_BASE)
1134 jal do_async_page_fault
1135 FEEDBACK_REENTER(interrupt_return)
1136
1137 /*
1138 * Go restart the "resume userspace" process. We may have
1139 * fired a signal, and we need to disable interrupts again.
1140 */
1141 j .Lresume_userspace
1142
1143.Lneed_sigpending:
1144 /*
1145 * At this point we are either doing signal handling or single-step,
1146 * so either way make sure we have all the registers saved.
1147 */
1148 push_extra_callee_saves r0
1149
1150 {
1151 /* If no signal pending, skip to singlestep check */
1152 bz r28, .Lneed_singlestep
1153
1154 /* Mask the singlestep flag */
1155 andi r28, r29, _TIF_SINGLESTEP
1156 }
1157
1158 jal do_signal
1159 FEEDBACK_REENTER(interrupt_return)
1160
1161 /* Reload the flags and check again */
1162 j .Lresume_userspace
1163
1164.Lneed_singlestep:
1165 {
1166 /* Get a pointer to the EX1 field */
1167 PTREGS_PTR(r29, PTREGS_OFFSET_EX1)
1168
1169 /* If we get here, our bit must be set. */
1170 bz r28, .Lwork_confusion
1171 }
1172 /* If we are in priv mode, don't single step */
1173 lw r28, r29
1174 andi r28, r28, SPR_EX_CONTEXT_1_1__PL_MASK /* mask off ICS */
1175 bnz r28, .Lrestore_all
1176
1177 /* Allow interrupts within the single step code */
1178 TRACE_IRQS_ON /* Note: clobbers registers r0-r29 */
1179 IRQ_ENABLE(r20, r21)
1180
1181 /* try to single-step the current instruction */
1182 PTREGS_PTR(r0, PTREGS_OFFSET_BASE)
1183 jal single_step_once
1184 FEEDBACK_REENTER(interrupt_return)
1185
1186 /* Re-disable interrupts. TRACE_IRQS_OFF in .Lrestore_all. */
1187 IRQ_DISABLE(r20,r21)
1188
1189 j .Lrestore_all
1190
1191.Lwork_confusion:
1192 move r0, r28
1193 panic "thread_info allwork flags unhandled on userspace resume: %#x"
1194
1195 STD_ENDPROC(interrupt_return) 1115 STD_ENDPROC(interrupt_return)
1196 1116
1197 /* 1117 /*
@@ -1550,7 +1470,10 @@ STD_ENTRY(_sys_clone)
1550 * We place it in the __HEAD section to ensure it is relatively 1470 * We place it in the __HEAD section to ensure it is relatively
1551 * near to the intvec_SWINT_1 code (reachable by a conditional branch). 1471 * near to the intvec_SWINT_1 code (reachable by a conditional branch).
1552 * 1472 *
1553 * Must match register usage in do_page_fault(). 1473 * Our use of ATOMIC_LOCK_REG here must match do_page_fault_ics().
1474 *
1475 * As we do in lib/atomic_asm_32.S, we bypass a store if the value we
1476 * would store is the same as the value we just loaded.
1554 */ 1477 */
1555 __HEAD 1478 __HEAD
1556 .align 64 1479 .align 64
@@ -1611,17 +1534,7 @@ ENTRY(sys_cmpxchg)
1611 { 1534 {
1612 shri r20, r25, 32 - ATOMIC_HASH_L1_SHIFT 1535 shri r20, r25, 32 - ATOMIC_HASH_L1_SHIFT
1613 slt_u r23, r0, r23 1536 slt_u r23, r0, r23
1614 1537 lw r26, r0 /* see comment in the "#else" for the "lw r26". */
1615 /*
1616 * Ensure that the TLB is loaded before we take out the lock.
1617 * On TILEPro, this will start fetching the value all the way
1618 * into our L1 as well (and if it gets modified before we
1619 * grab the lock, it will be invalidated from our cache
1620 * before we reload it). On tile64, we'll start fetching it
1621 * into our L1 if we're the home, and if we're not, we'll
1622 * still at least start fetching it into the home's L2.
1623 */
1624 lw r26, r0
1625 } 1538 }
1626 { 1539 {
1627 s2a r21, r20, r21 1540 s2a r21, r20, r21
@@ -1637,18 +1550,9 @@ ENTRY(sys_cmpxchg)
1637 bbs r23, .Lcmpxchg64 1550 bbs r23, .Lcmpxchg64
1638 andi r23, r0, 7 /* Precompute alignment for cmpxchg64. */ 1551 andi r23, r0, 7 /* Precompute alignment for cmpxchg64. */
1639 } 1552 }
1640
1641 { 1553 {
1642 /*
1643 * We very carefully align the code that actually runs with
1644 * the lock held (nine bundles) so that we know it is all in
1645 * the icache when we start. This instruction (the jump) is
1646 * at the start of the first cache line, address zero mod 64;
1647 * we jump to somewhere in the second cache line to issue the
1648 * tns, then jump back to finish up.
1649 */
1650 s2a ATOMIC_LOCK_REG_NAME, r25, r21 1554 s2a ATOMIC_LOCK_REG_NAME, r25, r21
1651 j .Lcmpxchg32_tns 1555 j .Lcmpxchg32_tns /* see comment in the #else for the jump. */
1652 } 1556 }
1653 1557
1654#else /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */ 1558#else /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */
@@ -1713,24 +1617,25 @@ ENTRY(sys_cmpxchg)
1713 { 1617 {
1714 /* 1618 /*
1715 * We very carefully align the code that actually runs with 1619 * We very carefully align the code that actually runs with
1716 * the lock held (nine bundles) so that we know it is all in 1620 * the lock held (twelve bundles) so that we know it is all in
1717 * the icache when we start. This instruction (the jump) is 1621 * the icache when we start. This instruction (the jump) is
1718 * at the start of the first cache line, address zero mod 64; 1622 * at the start of the first cache line, address zero mod 64;
1719 * we jump to somewhere in the second cache line to issue the 1623 * we jump to the very end of the second cache line to get that
1720 * tns, then jump back to finish up. 1624 * line loaded in the icache, then fall through to issue the tns
1625 * in the third cache line, at which point it's all cached.
1626 * Note that is for performance, not correctness.
1721 */ 1627 */
1722 j .Lcmpxchg32_tns 1628 j .Lcmpxchg32_tns
1723 } 1629 }
1724 1630
1725#endif /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */ 1631#endif /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */
1726 1632
1727 ENTRY(__sys_cmpxchg_grab_lock) 1633/* Symbol for do_page_fault_ics() to use to compare against the PC. */
1634.global __sys_cmpxchg_grab_lock
1635__sys_cmpxchg_grab_lock:
1728 1636
1729 /* 1637 /*
1730 * Perform the actual cmpxchg or atomic_update. 1638 * Perform the actual cmpxchg or atomic_update.
1731 * Note that the system <arch/atomic.h> header relies on
1732 * atomic_update() to always perform an "mf", so don't make
1733 * it optional or conditional without modifying that code.
1734 */ 1639 */
1735.Ldo_cmpxchg32: 1640.Ldo_cmpxchg32:
1736 { 1641 {
@@ -1748,10 +1653,13 @@ ENTRY(sys_cmpxchg)
1748 } 1653 }
1749 { 1654 {
1750 mvnz r24, r23, r25 /* Use atomic_update value if appropriate. */ 1655 mvnz r24, r23, r25 /* Use atomic_update value if appropriate. */
1751 bbns r22, .Lcmpxchg32_mismatch 1656 bbns r22, .Lcmpxchg32_nostore
1752 } 1657 }
1658 seq r22, r24, r21 /* Are we storing the value we loaded? */
1659 bbs r22, .Lcmpxchg32_nostore
1753 sw r0, r24 1660 sw r0, r24
1754 1661
1662 /* The following instruction is the start of the second cache line. */
1755 /* Do slow mtspr here so the following "mf" waits less. */ 1663 /* Do slow mtspr here so the following "mf" waits less. */
1756 { 1664 {
1757 move sp, r27 1665 move sp, r27
@@ -1759,7 +1667,6 @@ ENTRY(sys_cmpxchg)
1759 } 1667 }
1760 mf 1668 mf
1761 1669
1762 /* The following instruction is the start of the second cache line. */
1763 { 1670 {
1764 move r0, r21 1671 move r0, r21
1765 sw ATOMIC_LOCK_REG_NAME, zero 1672 sw ATOMIC_LOCK_REG_NAME, zero
@@ -1767,7 +1674,7 @@ ENTRY(sys_cmpxchg)
1767 iret 1674 iret
1768 1675
1769 /* Duplicated code here in the case where we don't overlap "mf" */ 1676 /* Duplicated code here in the case where we don't overlap "mf" */
1770.Lcmpxchg32_mismatch: 1677.Lcmpxchg32_nostore:
1771 { 1678 {
1772 move r0, r21 1679 move r0, r21
1773 sw ATOMIC_LOCK_REG_NAME, zero 1680 sw ATOMIC_LOCK_REG_NAME, zero
@@ -1783,8 +1690,6 @@ ENTRY(sys_cmpxchg)
1783 * and for 64-bit cmpxchg. We provide it as a macro and put 1690 * and for 64-bit cmpxchg. We provide it as a macro and put
1784 * it into both versions. We can't share the code literally 1691 * it into both versions. We can't share the code literally
1785 * since it depends on having the right branch-back address. 1692 * since it depends on having the right branch-back address.
1786 * Note that the first few instructions should share the cache
1787 * line with the second half of the actual locked code.
1788 */ 1693 */
1789 .macro cmpxchg_lock, bitwidth 1694 .macro cmpxchg_lock, bitwidth
1790 1695
@@ -1810,7 +1715,7 @@ ENTRY(sys_cmpxchg)
1810 } 1715 }
1811 /* 1716 /*
1812 * The preceding instruction is the last thing that must be 1717 * The preceding instruction is the last thing that must be
1813 * on the second cache line. 1718 * hot in the icache before we do the "tns" above.
1814 */ 1719 */
1815 1720
1816#ifdef CONFIG_SMP 1721#ifdef CONFIG_SMP
@@ -1841,6 +1746,12 @@ ENTRY(sys_cmpxchg)
1841 .endm 1746 .endm
1842 1747
1843.Lcmpxchg32_tns: 1748.Lcmpxchg32_tns:
1749 /*
1750 * This is the last instruction on the second cache line.
1751 * The nop here loads the second line, then we fall through
1752 * to the tns to load the third line before we take the lock.
1753 */
1754 nop
1844 cmpxchg_lock 32 1755 cmpxchg_lock 32
1845 1756
1846 /* 1757 /*