diff options
author | Christoph Hellwig <hch@infradead.org> | 2008-08-13 02:18:07 -0400 |
---|---|---|
committer | Lachlan McIlroy <lachlan@redback.melbourne.sgi.com> | 2008-08-13 02:18:07 -0400 |
commit | e1cccd917be7364f81b5dc4e33ee3a6e0db21a99 (patch) | |
tree | 9722a113d77695d886e6d2c1bc504e98cdb97b67 /fs/xfs | |
parent | 1550d0b0b08bc34c0c37a86bd884b1a70782104e (diff) |
[XFS] kill xfs_lock_dir_and_entry
When multiple inodes are locked in XFS it happens in order of the inode
number, with the everything but the first inode trylocked if any of the
previous inodes is in the AIL.
Except for the sorting of the inodes this logic is implemented in
xfs_lock_inodes, but also partially duplicated in xfs_lock_dir_and_entry
in a particularly stupid way adds a lock roundtrip if the inode ordering
is not optimal.
This patch adds a new helper xfs_lock_two_inodes that takes two inodes and
locks them in the most optimal way according to the above locking protocol
and uses it for all places that want to lock two inodes.
The only caller of xfs_lock_inodes is xfs_rename which might lock up to
four inodes.
SGI-PV: 981498
SGI-Modid: xfs-linux-melb:xfs-kern:31772a
Signed-off-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Donald Douwsma <donaldd@sgi.com>
Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>
Diffstat (limited to 'fs/xfs')
-rw-r--r-- | fs/xfs/xfs_dfrag.c | 14 | ||||
-rw-r--r-- | fs/xfs/xfs_inode.h | 1 | ||||
-rw-r--r-- | fs/xfs/xfs_vnodeops.c | 162 |
3 files changed, 44 insertions, 133 deletions
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c index d92407842e3b..5ce91a00425f 100644 --- a/fs/xfs/xfs_dfrag.c +++ b/fs/xfs/xfs_dfrag.c | |||
@@ -128,7 +128,6 @@ xfs_swap_extents( | |||
128 | xfs_swapext_t *sxp) | 128 | xfs_swapext_t *sxp) |
129 | { | 129 | { |
130 | xfs_mount_t *mp; | 130 | xfs_mount_t *mp; |
131 | xfs_inode_t *ips[2]; | ||
132 | xfs_trans_t *tp; | 131 | xfs_trans_t *tp; |
133 | xfs_bstat_t *sbp = &sxp->sx_stat; | 132 | xfs_bstat_t *sbp = &sxp->sx_stat; |
134 | bhv_vnode_t *vp, *tvp; | 133 | bhv_vnode_t *vp, *tvp; |
@@ -153,16 +152,7 @@ xfs_swap_extents( | |||
153 | vp = VFS_I(ip); | 152 | vp = VFS_I(ip); |
154 | tvp = VFS_I(tip); | 153 | tvp = VFS_I(tip); |
155 | 154 | ||
156 | /* Lock in i_ino order */ | 155 | xfs_lock_two_inodes(ip, tip, lock_flags); |
157 | if (ip->i_ino < tip->i_ino) { | ||
158 | ips[0] = ip; | ||
159 | ips[1] = tip; | ||
160 | } else { | ||
161 | ips[0] = tip; | ||
162 | ips[1] = ip; | ||
163 | } | ||
164 | |||
165 | xfs_lock_inodes(ips, 2, lock_flags); | ||
166 | locked = 1; | 156 | locked = 1; |
167 | 157 | ||
168 | /* Verify that both files have the same format */ | 158 | /* Verify that both files have the same format */ |
@@ -265,7 +255,7 @@ xfs_swap_extents( | |||
265 | locked = 0; | 255 | locked = 0; |
266 | goto error0; | 256 | goto error0; |
267 | } | 257 | } |
268 | xfs_lock_inodes(ips, 2, XFS_ILOCK_EXCL); | 258 | xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL); |
269 | 259 | ||
270 | /* | 260 | /* |
271 | * Count the number of extended attribute blocks | 261 | * Count the number of extended attribute blocks |
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 4088951230aa..ec9f454b464e 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h | |||
@@ -531,6 +531,7 @@ void xfs_iflush_all(struct xfs_mount *); | |||
531 | void xfs_ichgtime(xfs_inode_t *, int); | 531 | void xfs_ichgtime(xfs_inode_t *, int); |
532 | xfs_fsize_t xfs_file_last_byte(xfs_inode_t *); | 532 | xfs_fsize_t xfs_file_last_byte(xfs_inode_t *); |
533 | void xfs_lock_inodes(xfs_inode_t **, int, uint); | 533 | void xfs_lock_inodes(xfs_inode_t **, int, uint); |
534 | void xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint); | ||
534 | 535 | ||
535 | void xfs_synchronize_atime(xfs_inode_t *); | 536 | void xfs_synchronize_atime(xfs_inode_t *); |
536 | void xfs_mark_inode_dirty_sync(xfs_inode_t *); | 537 | void xfs_mark_inode_dirty_sync(xfs_inode_t *); |
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index c5dc7ea85260..077c86b6cb22 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c | |||
@@ -1708,111 +1708,6 @@ std_return: | |||
1708 | } | 1708 | } |
1709 | 1709 | ||
1710 | #ifdef DEBUG | 1710 | #ifdef DEBUG |
1711 | /* | ||
1712 | * Some counters to see if (and how often) we are hitting some deadlock | ||
1713 | * prevention code paths. | ||
1714 | */ | ||
1715 | |||
1716 | int xfs_rm_locks; | ||
1717 | int xfs_rm_lock_delays; | ||
1718 | int xfs_rm_attempts; | ||
1719 | #endif | ||
1720 | |||
1721 | /* | ||
1722 | * The following routine will lock the inodes associated with the | ||
1723 | * directory and the named entry in the directory. The locks are | ||
1724 | * acquired in increasing inode number. | ||
1725 | * | ||
1726 | * If the entry is "..", then only the directory is locked. The | ||
1727 | * vnode ref count will still include that from the .. entry in | ||
1728 | * this case. | ||
1729 | * | ||
1730 | * There is a deadlock we need to worry about. If the locked directory is | ||
1731 | * in the AIL, it might be blocking up the log. The next inode we lock | ||
1732 | * could be already locked by another thread waiting for log space (e.g | ||
1733 | * a permanent log reservation with a long running transaction (see | ||
1734 | * xfs_itruncate_finish)). To solve this, we must check if the directory | ||
1735 | * is in the ail and use lock_nowait. If we can't lock, we need to | ||
1736 | * drop the inode lock on the directory and try again. xfs_iunlock will | ||
1737 | * potentially push the tail if we were holding up the log. | ||
1738 | */ | ||
1739 | STATIC int | ||
1740 | xfs_lock_dir_and_entry( | ||
1741 | xfs_inode_t *dp, | ||
1742 | xfs_inode_t *ip) /* inode of entry 'name' */ | ||
1743 | { | ||
1744 | int attempts; | ||
1745 | xfs_ino_t e_inum; | ||
1746 | xfs_inode_t *ips[2]; | ||
1747 | xfs_log_item_t *lp; | ||
1748 | |||
1749 | #ifdef DEBUG | ||
1750 | xfs_rm_locks++; | ||
1751 | #endif | ||
1752 | attempts = 0; | ||
1753 | |||
1754 | again: | ||
1755 | xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); | ||
1756 | |||
1757 | e_inum = ip->i_ino; | ||
1758 | |||
1759 | xfs_itrace_ref(ip); | ||
1760 | |||
1761 | /* | ||
1762 | * We want to lock in increasing inum. Since we've already | ||
1763 | * acquired the lock on the directory, we may need to release | ||
1764 | * if if the inum of the entry turns out to be less. | ||
1765 | */ | ||
1766 | if (e_inum > dp->i_ino) { | ||
1767 | /* | ||
1768 | * We are already in the right order, so just | ||
1769 | * lock on the inode of the entry. | ||
1770 | * We need to use nowait if dp is in the AIL. | ||
1771 | */ | ||
1772 | |||
1773 | lp = (xfs_log_item_t *)dp->i_itemp; | ||
1774 | if (lp && (lp->li_flags & XFS_LI_IN_AIL)) { | ||
1775 | if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) { | ||
1776 | attempts++; | ||
1777 | #ifdef DEBUG | ||
1778 | xfs_rm_attempts++; | ||
1779 | #endif | ||
1780 | |||
1781 | /* | ||
1782 | * Unlock dp and try again. | ||
1783 | * xfs_iunlock will try to push the tail | ||
1784 | * if the inode is in the AIL. | ||
1785 | */ | ||
1786 | |||
1787 | xfs_iunlock(dp, XFS_ILOCK_EXCL); | ||
1788 | |||
1789 | if ((attempts % 5) == 0) { | ||
1790 | delay(1); /* Don't just spin the CPU */ | ||
1791 | #ifdef DEBUG | ||
1792 | xfs_rm_lock_delays++; | ||
1793 | #endif | ||
1794 | } | ||
1795 | goto again; | ||
1796 | } | ||
1797 | } else { | ||
1798 | xfs_ilock(ip, XFS_ILOCK_EXCL); | ||
1799 | } | ||
1800 | } else if (e_inum < dp->i_ino) { | ||
1801 | xfs_iunlock(dp, XFS_ILOCK_EXCL); | ||
1802 | |||
1803 | ips[0] = ip; | ||
1804 | ips[1] = dp; | ||
1805 | xfs_lock_inodes(ips, 2, XFS_ILOCK_EXCL); | ||
1806 | } | ||
1807 | /* else e_inum == dp->i_ino */ | ||
1808 | /* This can happen if we're asked to lock /x/.. | ||
1809 | * the entry is "..", which is also the parent directory. | ||
1810 | */ | ||
1811 | |||
1812 | return 0; | ||
1813 | } | ||
1814 | |||
1815 | #ifdef DEBUG | ||
1816 | int xfs_locked_n; | 1711 | int xfs_locked_n; |
1817 | int xfs_small_retries; | 1712 | int xfs_small_retries; |
1818 | int xfs_middle_retries; | 1713 | int xfs_middle_retries; |
@@ -1946,6 +1841,45 @@ again: | |||
1946 | #endif | 1841 | #endif |
1947 | } | 1842 | } |
1948 | 1843 | ||
1844 | void | ||
1845 | xfs_lock_two_inodes( | ||
1846 | xfs_inode_t *ip0, | ||
1847 | xfs_inode_t *ip1, | ||
1848 | uint lock_mode) | ||
1849 | { | ||
1850 | xfs_inode_t *temp; | ||
1851 | int attempts = 0; | ||
1852 | xfs_log_item_t *lp; | ||
1853 | |||
1854 | ASSERT(ip0->i_ino != ip1->i_ino); | ||
1855 | |||
1856 | if (ip0->i_ino > ip1->i_ino) { | ||
1857 | temp = ip0; | ||
1858 | ip0 = ip1; | ||
1859 | ip1 = temp; | ||
1860 | } | ||
1861 | |||
1862 | again: | ||
1863 | xfs_ilock(ip0, xfs_lock_inumorder(lock_mode, 0)); | ||
1864 | |||
1865 | /* | ||
1866 | * If the first lock we have locked is in the AIL, we must TRY to get | ||
1867 | * the second lock. If we can't get it, we must release the first one | ||
1868 | * and try again. | ||
1869 | */ | ||
1870 | lp = (xfs_log_item_t *)ip0->i_itemp; | ||
1871 | if (lp && (lp->li_flags & XFS_LI_IN_AIL)) { | ||
1872 | if (!xfs_ilock_nowait(ip1, xfs_lock_inumorder(lock_mode, 1))) { | ||
1873 | xfs_iunlock(ip0, lock_mode); | ||
1874 | if ((++attempts % 5) == 0) | ||
1875 | delay(1); /* Don't just spin the CPU */ | ||
1876 | goto again; | ||
1877 | } | ||
1878 | } else { | ||
1879 | xfs_ilock(ip1, xfs_lock_inumorder(lock_mode, 1)); | ||
1880 | } | ||
1881 | } | ||
1882 | |||
1949 | int | 1883 | int |
1950 | xfs_remove( | 1884 | xfs_remove( |
1951 | xfs_inode_t *dp, | 1885 | xfs_inode_t *dp, |
@@ -2018,9 +1952,7 @@ xfs_remove( | |||
2018 | goto out_trans_cancel; | 1952 | goto out_trans_cancel; |
2019 | } | 1953 | } |
2020 | 1954 | ||
2021 | error = xfs_lock_dir_and_entry(dp, ip); | 1955 | xfs_lock_two_inodes(dp, ip, XFS_ILOCK_EXCL); |
2022 | if (error) | ||
2023 | goto out_trans_cancel; | ||
2024 | 1956 | ||
2025 | /* | 1957 | /* |
2026 | * At this point, we've gotten both the directory and the entry | 1958 | * At this point, we've gotten both the directory and the entry |
@@ -2047,9 +1979,6 @@ xfs_remove( | |||
2047 | } | 1979 | } |
2048 | } | 1980 | } |
2049 | 1981 | ||
2050 | /* | ||
2051 | * Entry must exist since we did a lookup in xfs_lock_dir_and_entry. | ||
2052 | */ | ||
2053 | XFS_BMAP_INIT(&free_list, &first_block); | 1982 | XFS_BMAP_INIT(&free_list, &first_block); |
2054 | error = xfs_dir_removename(tp, dp, name, ip->i_ino, | 1983 | error = xfs_dir_removename(tp, dp, name, ip->i_ino, |
2055 | &first_block, &free_list, resblks); | 1984 | &first_block, &free_list, resblks); |
@@ -2155,7 +2084,6 @@ xfs_link( | |||
2155 | { | 2084 | { |
2156 | xfs_mount_t *mp = tdp->i_mount; | 2085 | xfs_mount_t *mp = tdp->i_mount; |
2157 | xfs_trans_t *tp; | 2086 | xfs_trans_t *tp; |
2158 | xfs_inode_t *ips[2]; | ||
2159 | int error; | 2087 | int error; |
2160 | xfs_bmap_free_t free_list; | 2088 | xfs_bmap_free_t free_list; |
2161 | xfs_fsblock_t first_block; | 2089 | xfs_fsblock_t first_block; |
@@ -2203,15 +2131,7 @@ xfs_link( | |||
2203 | goto error_return; | 2131 | goto error_return; |
2204 | } | 2132 | } |
2205 | 2133 | ||
2206 | if (sip->i_ino < tdp->i_ino) { | 2134 | xfs_lock_two_inodes(sip, tdp, XFS_ILOCK_EXCL); |
2207 | ips[0] = sip; | ||
2208 | ips[1] = tdp; | ||
2209 | } else { | ||
2210 | ips[0] = tdp; | ||
2211 | ips[1] = sip; | ||
2212 | } | ||
2213 | |||
2214 | xfs_lock_inodes(ips, 2, XFS_ILOCK_EXCL); | ||
2215 | 2135 | ||
2216 | /* | 2136 | /* |
2217 | * Increment vnode ref counts since xfs_trans_commit & | 2137 | * Increment vnode ref counts since xfs_trans_commit & |