aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorBrian Foster <bfoster@redhat.com>2017-01-09 10:38:38 -0500
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2017-01-12 05:39:40 -0500
commitb49ef758f6003a7ed0afb37b33fc96e238752497 (patch)
treeea48a14b27d8a48279f4afe36ca31393fa79e01d /fs
parent63fa793e757dfc08c09865d68936ce3d67a00047 (diff)
xfs: fix unbalanced inode reclaim flush locking
commit 98efe8af1c9ffac47e842b7a75ded903e2f028da upstream. Filesystem shutdown testing on an older distro kernel has uncovered an imbalanced locking pattern for the inode flush lock in xfs_reclaim_inode(). Specifically, there is a double unlock sequence between the call to xfs_iflush_abort() and xfs_reclaim_inode() at the "reclaim:" label. This actually does not cause obvious problems on current kernels due to the current flush lock implementation. Older kernels use a counting based flush lock mechanism, however, which effectively breaks the lock indefinitely when an already unlocked flush lock is repeatedly unlocked. Though this only currently occurs on filesystem shutdown, it has reproduced the effect of elevating an fs shutdown to a system-wide crash or hang. As it turns out, the flush lock is not actually required for the reclaim logic in xfs_reclaim_inode() because by that time we have already cycled the flush lock once while holding ILOCK_EXCL. Therefore, remove the additional flush lock/unlock cycle around the 'reclaim:' label and update branches into this label to release the flush lock where appropriate. Add an assert to xfs_ifunlock() to help prevent future occurences of the same problem. Reported-by: Zorro Lang <zlang@redhat.com> Signed-off-by: Brian Foster <bfoster@redhat.com> Reviewed-by: Dave Chinner <dchinner@redhat.com> Signed-off-by: Dave Chinner <david@fromorbit.com> Cc: Christoph Hellwig <hch@lst.de> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'fs')
-rw-r--r--fs/xfs/xfs_icache.c27
-rw-r--r--fs/xfs/xfs_inode.h11
2 files changed, 20 insertions, 18 deletions
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 1b4861f5d3d8..9c3e5c6ddf20 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -123,7 +123,6 @@ __xfs_inode_free(
123{ 123{
124 /* asserts to verify all state is correct here */ 124 /* asserts to verify all state is correct here */
125 ASSERT(atomic_read(&ip->i_pincount) == 0); 125 ASSERT(atomic_read(&ip->i_pincount) == 0);
126 ASSERT(!xfs_isiflocked(ip));
127 XFS_STATS_DEC(ip->i_mount, vn_active); 126 XFS_STATS_DEC(ip->i_mount, vn_active);
128 127
129 call_rcu(&VFS_I(ip)->i_rcu, xfs_inode_free_callback); 128 call_rcu(&VFS_I(ip)->i_rcu, xfs_inode_free_callback);
@@ -133,6 +132,8 @@ void
133xfs_inode_free( 132xfs_inode_free(
134 struct xfs_inode *ip) 133 struct xfs_inode *ip)
135{ 134{
135 ASSERT(!xfs_isiflocked(ip));
136
136 /* 137 /*
137 * Because we use RCU freeing we need to ensure the inode always 138 * Because we use RCU freeing we need to ensure the inode always
138 * appears to be reclaimed with an invalid inode number when in the 139 * appears to be reclaimed with an invalid inode number when in the
@@ -981,6 +982,7 @@ restart:
981 982
982 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { 983 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
983 xfs_iunpin_wait(ip); 984 xfs_iunpin_wait(ip);
985 /* xfs_iflush_abort() drops the flush lock */
984 xfs_iflush_abort(ip, false); 986 xfs_iflush_abort(ip, false);
985 goto reclaim; 987 goto reclaim;
986 } 988 }
@@ -989,10 +991,10 @@ restart:
989 goto out_ifunlock; 991 goto out_ifunlock;
990 xfs_iunpin_wait(ip); 992 xfs_iunpin_wait(ip);
991 } 993 }
992 if (xfs_iflags_test(ip, XFS_ISTALE)) 994 if (xfs_iflags_test(ip, XFS_ISTALE) || xfs_inode_clean(ip)) {
993 goto reclaim; 995 xfs_ifunlock(ip);
994 if (xfs_inode_clean(ip))
995 goto reclaim; 996 goto reclaim;
997 }
996 998
997 /* 999 /*
998 * Never flush out dirty data during non-blocking reclaim, as it would 1000 * Never flush out dirty data during non-blocking reclaim, as it would
@@ -1030,25 +1032,24 @@ restart:
1030 xfs_buf_relse(bp); 1032 xfs_buf_relse(bp);
1031 } 1033 }
1032 1034
1033 xfs_iflock(ip);
1034reclaim: 1035reclaim:
1036 ASSERT(!xfs_isiflocked(ip));
1037
1035 /* 1038 /*
1036 * Because we use RCU freeing we need to ensure the inode always appears 1039 * Because we use RCU freeing we need to ensure the inode always appears
1037 * to be reclaimed with an invalid inode number when in the free state. 1040 * to be reclaimed with an invalid inode number when in the free state.
1038 * We do this as early as possible under the ILOCK and flush lock so 1041 * We do this as early as possible under the ILOCK so that
1039 * that xfs_iflush_cluster() can be guaranteed to detect races with us 1042 * xfs_iflush_cluster() can be guaranteed to detect races with us here.
1040 * here. By doing this, we guarantee that once xfs_iflush_cluster has 1043 * By doing this, we guarantee that once xfs_iflush_cluster has locked
1041 * locked both the XFS_ILOCK and the flush lock that it will see either 1044 * XFS_ILOCK that it will see either a valid, flushable inode that will
1042 * a valid, flushable inode that will serialise correctly against the 1045 * serialise correctly, or it will see a clean (and invalid) inode that
1043 * locks below, or it will see a clean (and invalid) inode that it can 1046 * it can skip.
1044 * skip.
1045 */ 1047 */
1046 spin_lock(&ip->i_flags_lock); 1048 spin_lock(&ip->i_flags_lock);
1047 ip->i_flags = XFS_IRECLAIM; 1049 ip->i_flags = XFS_IRECLAIM;
1048 ip->i_ino = 0; 1050 ip->i_ino = 0;
1049 spin_unlock(&ip->i_flags_lock); 1051 spin_unlock(&ip->i_flags_lock);
1050 1052
1051 xfs_ifunlock(ip);
1052 xfs_iunlock(ip, XFS_ILOCK_EXCL); 1053 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1053 1054
1054 XFS_STATS_INC(ip->i_mount, xs_ig_reclaims); 1055 XFS_STATS_INC(ip->i_mount, xs_ig_reclaims);
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index f14c1de2549d..71e8a81c91a3 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -246,6 +246,11 @@ static inline bool xfs_is_reflink_inode(struct xfs_inode *ip)
246 * Synchronize processes attempting to flush the in-core inode back to disk. 246 * Synchronize processes attempting to flush the in-core inode back to disk.
247 */ 247 */
248 248
249static inline int xfs_isiflocked(struct xfs_inode *ip)
250{
251 return xfs_iflags_test(ip, XFS_IFLOCK);
252}
253
249extern void __xfs_iflock(struct xfs_inode *ip); 254extern void __xfs_iflock(struct xfs_inode *ip);
250 255
251static inline int xfs_iflock_nowait(struct xfs_inode *ip) 256static inline int xfs_iflock_nowait(struct xfs_inode *ip)
@@ -261,16 +266,12 @@ static inline void xfs_iflock(struct xfs_inode *ip)
261 266
262static inline void xfs_ifunlock(struct xfs_inode *ip) 267static inline void xfs_ifunlock(struct xfs_inode *ip)
263{ 268{
269 ASSERT(xfs_isiflocked(ip));
264 xfs_iflags_clear(ip, XFS_IFLOCK); 270 xfs_iflags_clear(ip, XFS_IFLOCK);
265 smp_mb(); 271 smp_mb();
266 wake_up_bit(&ip->i_flags, __XFS_IFLOCK_BIT); 272 wake_up_bit(&ip->i_flags, __XFS_IFLOCK_BIT);
267} 273}
268 274
269static inline int xfs_isiflocked(struct xfs_inode *ip)
270{
271 return xfs_iflags_test(ip, XFS_IFLOCK);
272}
273
274/* 275/*
275 * Flags for inode locking. 276 * Flags for inode locking.
276 * Bit ranges: 1<<1 - 1<<16-1 -- iolock/ilock modes (bitfield) 277 * Bit ranges: 1<<1 - 1<<16-1 -- iolock/ilock modes (bitfield)