aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDave Chinner <dchinner@redhat.com>2010-08-23 21:42:41 -0400
committerDave Chinner <david@fromorbit.com>2010-08-23 21:42:41 -0400
commit5b3eed756cd37255cad1181bd86bfd0977e97953 (patch)
tree3d6b178c346d5980dd650336c107abd9d75ee584
parentd17c701ce6a548a92f7f8a3cec20299465f36ee3 (diff)
xfs: ensure we mark all inodes in a freed cluster XFS_ISTALE
Under heavy load parallel metadata loads (e.g. dbench), we can fail to mark all the inodes in a cluster being freed as XFS_ISTALE as we skip inodes we cannot get the XFS_ILOCK_EXCL or the flush lock on. When this happens and the inode cluster buffer has already been marked stale and freed, inode reclaim can try to write the inode out as it is dirty and not marked stale. This can result in writing th metadata to an freed extent, or in the case it has already been overwritten trigger a magic number check failure and return an EUCLEAN error such as: Filesystem "ram0": inode 0x442ba1 background reclaim flush failed with 117 Fix this by ensuring that we hoover up all in memory inodes in the cluster and mark them XFS_ISTALE when freeing the cluster. Cc: <stable@kernel.org> Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Christoph Hellwig <hch@lst.de>
-rw-r--r--fs/xfs/xfs_inode.c49
1 files changed, 26 insertions, 23 deletions
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 68415cb4f23c..34798f391c49 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1914,6 +1914,11 @@ xfs_iunlink_remove(
1914 return 0; 1914 return 0;
1915} 1915}
1916 1916
1917/*
1918 * A big issue when freeing the inode cluster is is that we _cannot_ skip any
1919 * inodes that are in memory - they all must be marked stale and attached to
1920 * the cluster buffer.
1921 */
1917STATIC void 1922STATIC void
1918xfs_ifree_cluster( 1923xfs_ifree_cluster(
1919 xfs_inode_t *free_ip, 1924 xfs_inode_t *free_ip,
@@ -1945,8 +1950,6 @@ xfs_ifree_cluster(
1945 } 1950 }
1946 1951
1947 for (j = 0; j < nbufs; j++, inum += ninodes) { 1952 for (j = 0; j < nbufs; j++, inum += ninodes) {
1948 int found = 0;
1949
1950 blkno = XFS_AGB_TO_DADDR(mp, XFS_INO_TO_AGNO(mp, inum), 1953 blkno = XFS_AGB_TO_DADDR(mp, XFS_INO_TO_AGNO(mp, inum),
1951 XFS_INO_TO_AGBNO(mp, inum)); 1954 XFS_INO_TO_AGBNO(mp, inum));
1952 1955
@@ -1965,7 +1968,9 @@ xfs_ifree_cluster(
1965 /* 1968 /*
1966 * Walk the inodes already attached to the buffer and mark them 1969 * Walk the inodes already attached to the buffer and mark them
1967 * stale. These will all have the flush locks held, so an 1970 * stale. These will all have the flush locks held, so an
1968 * in-memory inode walk can't lock them. 1971 * in-memory inode walk can't lock them. By marking them all
1972 * stale first, we will not attempt to lock them in the loop
1973 * below as the XFS_ISTALE flag will be set.
1969 */ 1974 */
1970 lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); 1975 lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *);
1971 while (lip) { 1976 while (lip) {
@@ -1977,11 +1982,11 @@ xfs_ifree_cluster(
1977 &iip->ili_flush_lsn, 1982 &iip->ili_flush_lsn,
1978 &iip->ili_item.li_lsn); 1983 &iip->ili_item.li_lsn);
1979 xfs_iflags_set(iip->ili_inode, XFS_ISTALE); 1984 xfs_iflags_set(iip->ili_inode, XFS_ISTALE);
1980 found++;
1981 } 1985 }
1982 lip = lip->li_bio_list; 1986 lip = lip->li_bio_list;
1983 } 1987 }
1984 1988
1989
1985 /* 1990 /*
1986 * For each inode in memory attempt to add it to the inode 1991 * For each inode in memory attempt to add it to the inode
1987 * buffer and set it up for being staled on buffer IO 1992 * buffer and set it up for being staled on buffer IO
@@ -1993,6 +1998,7 @@ xfs_ifree_cluster(
1993 * even trying to lock them. 1998 * even trying to lock them.
1994 */ 1999 */
1995 for (i = 0; i < ninodes; i++) { 2000 for (i = 0; i < ninodes; i++) {
2001retry:
1996 read_lock(&pag->pag_ici_lock); 2002 read_lock(&pag->pag_ici_lock);
1997 ip = radix_tree_lookup(&pag->pag_ici_root, 2003 ip = radix_tree_lookup(&pag->pag_ici_root,
1998 XFS_INO_TO_AGINO(mp, (inum + i))); 2004 XFS_INO_TO_AGINO(mp, (inum + i)));
@@ -2003,38 +2009,36 @@ xfs_ifree_cluster(
2003 continue; 2009 continue;
2004 } 2010 }
2005 2011
2006 /* don't try to lock/unlock the current inode */ 2012 /*
2013 * Don't try to lock/unlock the current inode, but we
2014 * _cannot_ skip the other inodes that we did not find
2015 * in the list attached to the buffer and are not
2016 * already marked stale. If we can't lock it, back off
2017 * and retry.
2018 */
2007 if (ip != free_ip && 2019 if (ip != free_ip &&
2008 !xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) { 2020 !xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) {
2009 read_unlock(&pag->pag_ici_lock); 2021 read_unlock(&pag->pag_ici_lock);
2010 continue; 2022 delay(1);
2023 goto retry;
2011 } 2024 }
2012 read_unlock(&pag->pag_ici_lock); 2025 read_unlock(&pag->pag_ici_lock);
2013 2026
2014 if (!xfs_iflock_nowait(ip)) { 2027 xfs_iflock(ip);
2015 if (ip != free_ip)
2016 xfs_iunlock(ip, XFS_ILOCK_EXCL);
2017 continue;
2018 }
2019
2020 xfs_iflags_set(ip, XFS_ISTALE); 2028 xfs_iflags_set(ip, XFS_ISTALE);
2021 if (xfs_inode_clean(ip)) {
2022 ASSERT(ip != free_ip);
2023 xfs_ifunlock(ip);
2024 xfs_iunlock(ip, XFS_ILOCK_EXCL);
2025 continue;
2026 }
2027 2029
2030 /*
2031 * we don't need to attach clean inodes or those only
2032 * with unlogged changes (which we throw away, anyway).
2033 */
2028 iip = ip->i_itemp; 2034 iip = ip->i_itemp;
2029 if (!iip) { 2035 if (!iip || xfs_inode_clean(ip)) {
2030 /* inode with unlogged changes only */
2031 ASSERT(ip != free_ip); 2036 ASSERT(ip != free_ip);
2032 ip->i_update_core = 0; 2037 ip->i_update_core = 0;
2033 xfs_ifunlock(ip); 2038 xfs_ifunlock(ip);
2034 xfs_iunlock(ip, XFS_ILOCK_EXCL); 2039 xfs_iunlock(ip, XFS_ILOCK_EXCL);
2035 continue; 2040 continue;
2036 } 2041 }
2037 found++;
2038 2042
2039 iip->ili_last_fields = iip->ili_format.ilf_fields; 2043 iip->ili_last_fields = iip->ili_format.ilf_fields;
2040 iip->ili_format.ilf_fields = 0; 2044 iip->ili_format.ilf_fields = 0;
@@ -2049,8 +2053,7 @@ xfs_ifree_cluster(
2049 xfs_iunlock(ip, XFS_ILOCK_EXCL); 2053 xfs_iunlock(ip, XFS_ILOCK_EXCL);
2050 } 2054 }
2051 2055
2052 if (found) 2056 xfs_trans_stale_inode_buf(tp, bp);
2053 xfs_trans_stale_inode_buf(tp, bp);
2054 xfs_trans_binval(tp, bp); 2057 xfs_trans_binval(tp, bp);
2055 } 2058 }
2056 2059