diff options
author | Brian Foster <bfoster@redhat.com> | 2017-05-31 11:22:52 -0400 |
---|---|---|
committer | Darrick J. Wong <darrick.wong@oracle.com> | 2017-05-31 11:22:52 -0400 |
commit | 63db7c815bc0997c29e484d2409684fdd9fcd93b (patch) | |
tree | 132764569fad16360e83d774481c35678db5bd0a /fs/xfs/xfs_buf.c | |
parent | a54fba8f5a0dc36161cacdf2aa90f007f702ec1a (diff) |
xfs: use ->b_state to fix buffer I/O accounting release race
We've had user reports of unmount hangs in xfs_wait_buftarg() that
analysis shows is due to btp->bt_io_count == -1. bt_io_count
represents the count of in-flight asynchronous buffers and thus
should always be >= 0. xfs_wait_buftarg() waits for this value to
stabilize to zero in order to ensure that all untracked (with
respect to the lru) buffers have completed I/O processing before
unmount proceeds to tear down in-core data structures.
The value of -1 implies an I/O accounting decrement race. Indeed,
the fact that xfs_buf_ioacct_dec() is called from xfs_buf_rele()
(where the buffer lock is no longer held) means that bp->b_flags can
be updated from an unsafe context. While a user-level reproducer is
currently not available, some intrusive hacks to run racing buffer
lookups/ioacct/releases from multiple threads was used to
successfully manufacture this problem.
Existing callers do not expect to acquire the buffer lock from
xfs_buf_rele(). Therefore, we can not safely update ->b_flags from
this context. It turns out that we already have separate buffer
state bits and associated serialization for dealing with buffer LRU
state in the form of ->b_state and ->b_lock. Therefore, replace the
_XBF_IN_FLIGHT flag with a ->b_state variant, update the I/O
accounting wrappers appropriately and make sure they are used with
the correct locking. This ensures that buffer in-flight state can be
modified at buffer release time without racing with modifications
from a buffer lock holder.
Fixes: 9c7504aa72b6 ("xfs: track and serialize in-flight async buffers against unmount")
Cc: <stable@vger.kernel.org> # v4.8+
Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Nikolay Borisov <nborisov@suse.com>
Tested-by: Libor Pechacek <lpechacek@suse.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Diffstat (limited to 'fs/xfs/xfs_buf.c')
-rw-r--r-- | fs/xfs/xfs_buf.c | 38 |
1 files changed, 26 insertions, 12 deletions
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 62fa39276a24..07b77b73b024 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c | |||
@@ -97,12 +97,16 @@ static inline void | |||
97 | xfs_buf_ioacct_inc( | 97 | xfs_buf_ioacct_inc( |
98 | struct xfs_buf *bp) | 98 | struct xfs_buf *bp) |
99 | { | 99 | { |
100 | if (bp->b_flags & (XBF_NO_IOACCT|_XBF_IN_FLIGHT)) | 100 | if (bp->b_flags & XBF_NO_IOACCT) |
101 | return; | 101 | return; |
102 | 102 | ||
103 | ASSERT(bp->b_flags & XBF_ASYNC); | 103 | ASSERT(bp->b_flags & XBF_ASYNC); |
104 | bp->b_flags |= _XBF_IN_FLIGHT; | 104 | spin_lock(&bp->b_lock); |
105 | percpu_counter_inc(&bp->b_target->bt_io_count); | 105 | if (!(bp->b_state & XFS_BSTATE_IN_FLIGHT)) { |
106 | bp->b_state |= XFS_BSTATE_IN_FLIGHT; | ||
107 | percpu_counter_inc(&bp->b_target->bt_io_count); | ||
108 | } | ||
109 | spin_unlock(&bp->b_lock); | ||
106 | } | 110 | } |
107 | 111 | ||
108 | /* | 112 | /* |
@@ -110,14 +114,24 @@ xfs_buf_ioacct_inc( | |||
110 | * freed and unaccount from the buftarg. | 114 | * freed and unaccount from the buftarg. |
111 | */ | 115 | */ |
112 | static inline void | 116 | static inline void |
113 | xfs_buf_ioacct_dec( | 117 | __xfs_buf_ioacct_dec( |
114 | struct xfs_buf *bp) | 118 | struct xfs_buf *bp) |
115 | { | 119 | { |
116 | if (!(bp->b_flags & _XBF_IN_FLIGHT)) | 120 | ASSERT(spin_is_locked(&bp->b_lock)); |
117 | return; | ||
118 | 121 | ||
119 | bp->b_flags &= ~_XBF_IN_FLIGHT; | 122 | if (bp->b_state & XFS_BSTATE_IN_FLIGHT) { |
120 | percpu_counter_dec(&bp->b_target->bt_io_count); | 123 | bp->b_state &= ~XFS_BSTATE_IN_FLIGHT; |
124 | percpu_counter_dec(&bp->b_target->bt_io_count); | ||
125 | } | ||
126 | } | ||
127 | |||
128 | static inline void | ||
129 | xfs_buf_ioacct_dec( | ||
130 | struct xfs_buf *bp) | ||
131 | { | ||
132 | spin_lock(&bp->b_lock); | ||
133 | __xfs_buf_ioacct_dec(bp); | ||
134 | spin_unlock(&bp->b_lock); | ||
121 | } | 135 | } |
122 | 136 | ||
123 | /* | 137 | /* |
@@ -149,9 +163,9 @@ xfs_buf_stale( | |||
149 | * unaccounted (released to LRU) before that occurs. Drop in-flight | 163 | * unaccounted (released to LRU) before that occurs. Drop in-flight |
150 | * status now to preserve accounting consistency. | 164 | * status now to preserve accounting consistency. |
151 | */ | 165 | */ |
152 | xfs_buf_ioacct_dec(bp); | ||
153 | |||
154 | spin_lock(&bp->b_lock); | 166 | spin_lock(&bp->b_lock); |
167 | __xfs_buf_ioacct_dec(bp); | ||
168 | |||
155 | atomic_set(&bp->b_lru_ref, 0); | 169 | atomic_set(&bp->b_lru_ref, 0); |
156 | if (!(bp->b_state & XFS_BSTATE_DISPOSE) && | 170 | if (!(bp->b_state & XFS_BSTATE_DISPOSE) && |
157 | (list_lru_del(&bp->b_target->bt_lru, &bp->b_lru))) | 171 | (list_lru_del(&bp->b_target->bt_lru, &bp->b_lru))) |
@@ -979,12 +993,12 @@ xfs_buf_rele( | |||
979 | * ensures the decrement occurs only once per-buf. | 993 | * ensures the decrement occurs only once per-buf. |
980 | */ | 994 | */ |
981 | if ((atomic_read(&bp->b_hold) == 1) && !list_empty(&bp->b_lru)) | 995 | if ((atomic_read(&bp->b_hold) == 1) && !list_empty(&bp->b_lru)) |
982 | xfs_buf_ioacct_dec(bp); | 996 | __xfs_buf_ioacct_dec(bp); |
983 | goto out_unlock; | 997 | goto out_unlock; |
984 | } | 998 | } |
985 | 999 | ||
986 | /* the last reference has been dropped ... */ | 1000 | /* the last reference has been dropped ... */ |
987 | xfs_buf_ioacct_dec(bp); | 1001 | __xfs_buf_ioacct_dec(bp); |
988 | if (!(bp->b_flags & XBF_STALE) && atomic_read(&bp->b_lru_ref)) { | 1002 | if (!(bp->b_flags & XBF_STALE) && atomic_read(&bp->b_lru_ref)) { |
989 | /* | 1003 | /* |
990 | * If the buffer is added to the LRU take a new reference to the | 1004 | * If the buffer is added to the LRU take a new reference to the |