diff options
Diffstat (limited to 'arch/tile')
-rw-r--r-- | arch/tile/kernel/intvec_32.S | 61 | ||||
-rw-r--r-- | arch/tile/lib/atomic_asm_32.S | 2 |
2 files changed, 27 insertions, 36 deletions
diff --git a/arch/tile/kernel/intvec_32.S b/arch/tile/kernel/intvec_32.S index f35c3124fa62..72ade79b621b 100644 --- a/arch/tile/kernel/intvec_32.S +++ b/arch/tile/kernel/intvec_32.S | |||
@@ -1470,7 +1470,10 @@ STD_ENTRY(_sys_clone) | |||
1470 | * We place it in the __HEAD section to ensure it is relatively | 1470 | * We place it in the __HEAD section to ensure it is relatively |
1471 | * near to the intvec_SWINT_1 code (reachable by a conditional branch). | 1471 | * near to the intvec_SWINT_1 code (reachable by a conditional branch). |
1472 | * | 1472 | * |
1473 | * Must match register usage in do_page_fault(). | 1473 | * Our use of ATOMIC_LOCK_REG here must match do_page_fault_ics(). |
1474 | * | ||
1475 | * As we do in lib/atomic_asm_32.S, we bypass a store if the value we | ||
1476 | * would store is the same as the value we just loaded. | ||
1474 | */ | 1477 | */ |
1475 | __HEAD | 1478 | __HEAD |
1476 | .align 64 | 1479 | .align 64 |
@@ -1531,17 +1534,7 @@ ENTRY(sys_cmpxchg) | |||
1531 | { | 1534 | { |
1532 | shri r20, r25, 32 - ATOMIC_HASH_L1_SHIFT | 1535 | shri r20, r25, 32 - ATOMIC_HASH_L1_SHIFT |
1533 | slt_u r23, r0, r23 | 1536 | slt_u r23, r0, r23 |
1534 | 1537 | lw r26, r0 /* see comment in the "#else" for the "lw r26". */ | |
1535 | /* | ||
1536 | * Ensure that the TLB is loaded before we take out the lock. | ||
1537 | * On TILEPro, this will start fetching the value all the way | ||
1538 | * into our L1 as well (and if it gets modified before we | ||
1539 | * grab the lock, it will be invalidated from our cache | ||
1540 | * before we reload it). On tile64, we'll start fetching it | ||
1541 | * into our L1 if we're the home, and if we're not, we'll | ||
1542 | * still at least start fetching it into the home's L2. | ||
1543 | */ | ||
1544 | lw r26, r0 | ||
1545 | } | 1538 | } |
1546 | { | 1539 | { |
1547 | s2a r21, r20, r21 | 1540 | s2a r21, r20, r21 |
@@ -1557,18 +1550,9 @@ ENTRY(sys_cmpxchg) | |||
1557 | bbs r23, .Lcmpxchg64 | 1550 | bbs r23, .Lcmpxchg64 |
1558 | andi r23, r0, 7 /* Precompute alignment for cmpxchg64. */ | 1551 | andi r23, r0, 7 /* Precompute alignment for cmpxchg64. */ |
1559 | } | 1552 | } |
1560 | |||
1561 | { | 1553 | { |
1562 | /* | ||
1563 | * We very carefully align the code that actually runs with | ||
1564 | * the lock held (nine bundles) so that we know it is all in | ||
1565 | * the icache when we start. This instruction (the jump) is | ||
1566 | * at the start of the first cache line, address zero mod 64; | ||
1567 | * we jump to somewhere in the second cache line to issue the | ||
1568 | * tns, then jump back to finish up. | ||
1569 | */ | ||
1570 | s2a ATOMIC_LOCK_REG_NAME, r25, r21 | 1554 | s2a ATOMIC_LOCK_REG_NAME, r25, r21 |
1571 | j .Lcmpxchg32_tns | 1555 | j .Lcmpxchg32_tns /* see comment in the #else for the jump. */ |
1572 | } | 1556 | } |
1573 | 1557 | ||
1574 | #else /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */ | 1558 | #else /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */ |
@@ -1633,24 +1617,25 @@ ENTRY(sys_cmpxchg) | |||
1633 | { | 1617 | { |
1634 | /* | 1618 | /* |
1635 | * We very carefully align the code that actually runs with | 1619 | * We very carefully align the code that actually runs with |
1636 | * the lock held (nine bundles) so that we know it is all in | 1620 | * the lock held (twelve bundles) so that we know it is all in |
1637 | * the icache when we start. This instruction (the jump) is | 1621 | * the icache when we start. This instruction (the jump) is |
1638 | * at the start of the first cache line, address zero mod 64; | 1622 | * at the start of the first cache line, address zero mod 64; |
1639 | * we jump to somewhere in the second cache line to issue the | 1623 | * we jump to the very end of the second cache line to get that |
1640 | * tns, then jump back to finish up. | 1624 | * line loaded in the icache, then fall through to issue the tns |
1625 | * in the third cache line, at which point it's all cached. | ||
1626 | * Note that is for performance, not correctness. | ||
1641 | */ | 1627 | */ |
1642 | j .Lcmpxchg32_tns | 1628 | j .Lcmpxchg32_tns |
1643 | } | 1629 | } |
1644 | 1630 | ||
1645 | #endif /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */ | 1631 | #endif /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */ |
1646 | 1632 | ||
1647 | ENTRY(__sys_cmpxchg_grab_lock) | 1633 | /* Symbol for do_page_fault_ics() to use to compare against the PC. */ |
1634 | .global __sys_cmpxchg_grab_lock | ||
1635 | __sys_cmpxchg_grab_lock: | ||
1648 | 1636 | ||
1649 | /* | 1637 | /* |
1650 | * Perform the actual cmpxchg or atomic_update. | 1638 | * Perform the actual cmpxchg or atomic_update. |
1651 | * Note that the system <arch/atomic.h> header relies on | ||
1652 | * atomic_update() to always perform an "mf", so don't make | ||
1653 | * it optional or conditional without modifying that code. | ||
1654 | */ | 1639 | */ |
1655 | .Ldo_cmpxchg32: | 1640 | .Ldo_cmpxchg32: |
1656 | { | 1641 | { |
@@ -1668,10 +1653,13 @@ ENTRY(sys_cmpxchg) | |||
1668 | } | 1653 | } |
1669 | { | 1654 | { |
1670 | mvnz r24, r23, r25 /* Use atomic_update value if appropriate. */ | 1655 | mvnz r24, r23, r25 /* Use atomic_update value if appropriate. */ |
1671 | bbns r22, .Lcmpxchg32_mismatch | 1656 | bbns r22, .Lcmpxchg32_nostore |
1672 | } | 1657 | } |
1658 | seq r22, r24, r21 /* Are we storing the value we loaded? */ | ||
1659 | bbs r22, .Lcmpxchg32_nostore | ||
1673 | sw r0, r24 | 1660 | sw r0, r24 |
1674 | 1661 | ||
1662 | /* The following instruction is the start of the second cache line. */ | ||
1675 | /* Do slow mtspr here so the following "mf" waits less. */ | 1663 | /* Do slow mtspr here so the following "mf" waits less. */ |
1676 | { | 1664 | { |
1677 | move sp, r27 | 1665 | move sp, r27 |
@@ -1679,7 +1667,6 @@ ENTRY(sys_cmpxchg) | |||
1679 | } | 1667 | } |
1680 | mf | 1668 | mf |
1681 | 1669 | ||
1682 | /* The following instruction is the start of the second cache line. */ | ||
1683 | { | 1670 | { |
1684 | move r0, r21 | 1671 | move r0, r21 |
1685 | sw ATOMIC_LOCK_REG_NAME, zero | 1672 | sw ATOMIC_LOCK_REG_NAME, zero |
@@ -1687,7 +1674,7 @@ ENTRY(sys_cmpxchg) | |||
1687 | iret | 1674 | iret |
1688 | 1675 | ||
1689 | /* Duplicated code here in the case where we don't overlap "mf" */ | 1676 | /* Duplicated code here in the case where we don't overlap "mf" */ |
1690 | .Lcmpxchg32_mismatch: | 1677 | .Lcmpxchg32_nostore: |
1691 | { | 1678 | { |
1692 | move r0, r21 | 1679 | move r0, r21 |
1693 | sw ATOMIC_LOCK_REG_NAME, zero | 1680 | sw ATOMIC_LOCK_REG_NAME, zero |
@@ -1703,8 +1690,6 @@ ENTRY(sys_cmpxchg) | |||
1703 | * and for 64-bit cmpxchg. We provide it as a macro and put | 1690 | * and for 64-bit cmpxchg. We provide it as a macro and put |
1704 | * it into both versions. We can't share the code literally | 1691 | * it into both versions. We can't share the code literally |
1705 | * since it depends on having the right branch-back address. | 1692 | * since it depends on having the right branch-back address. |
1706 | * Note that the first few instructions should share the cache | ||
1707 | * line with the second half of the actual locked code. | ||
1708 | */ | 1693 | */ |
1709 | .macro cmpxchg_lock, bitwidth | 1694 | .macro cmpxchg_lock, bitwidth |
1710 | 1695 | ||
@@ -1730,7 +1715,7 @@ ENTRY(sys_cmpxchg) | |||
1730 | } | 1715 | } |
1731 | /* | 1716 | /* |
1732 | * The preceding instruction is the last thing that must be | 1717 | * The preceding instruction is the last thing that must be |
1733 | * on the second cache line. | 1718 | * hot in the icache before we do the "tns" above. |
1734 | */ | 1719 | */ |
1735 | 1720 | ||
1736 | #ifdef CONFIG_SMP | 1721 | #ifdef CONFIG_SMP |
@@ -1761,6 +1746,12 @@ ENTRY(sys_cmpxchg) | |||
1761 | .endm | 1746 | .endm |
1762 | 1747 | ||
1763 | .Lcmpxchg32_tns: | 1748 | .Lcmpxchg32_tns: |
1749 | /* | ||
1750 | * This is the last instruction on the second cache line. | ||
1751 | * The nop here loads the second line, then we fall through | ||
1752 | * to the tns to load the third line before we take the lock. | ||
1753 | */ | ||
1754 | nop | ||
1764 | cmpxchg_lock 32 | 1755 | cmpxchg_lock 32 |
1765 | 1756 | ||
1766 | /* | 1757 | /* |
diff --git a/arch/tile/lib/atomic_asm_32.S b/arch/tile/lib/atomic_asm_32.S index 82f64cc63658..24448734f6f1 100644 --- a/arch/tile/lib/atomic_asm_32.S +++ b/arch/tile/lib/atomic_asm_32.S | |||
@@ -59,7 +59,7 @@ | |||
59 | * bad kernel addresses). | 59 | * bad kernel addresses). |
60 | * | 60 | * |
61 | * Note that if the value we would store is the same as what we | 61 | * Note that if the value we would store is the same as what we |
62 | * loaded, we bypass the load. Other platforms with true atomics can | 62 | * loaded, we bypass the store. Other platforms with true atomics can |
63 | * make the guarantee that a non-atomic __clear_bit(), for example, | 63 | * make the guarantee that a non-atomic __clear_bit(), for example, |
64 | * can safely race with an atomic test_and_set_bit(); this example is | 64 | * can safely race with an atomic test_and_set_bit(); this example is |
65 | * from bit_spinlock.h in slub_lock() / slub_unlock(). We can't do | 65 | * from bit_spinlock.h in slub_lock() / slub_unlock(). We can't do |