aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBrian Foster <bfoster@redhat.com>2015-10-12 00:34:20 -0400
committerDave Chinner <david@fromorbit.com>2015-10-12 00:34:20 -0400
commit009c6e871e98aa23bc2e58474c3d9feb05dd1ae6 (patch)
tree5af6b1c62320e3643bfbc895f9ee9fbe93103ba4
parent5cb13dcd0fac071b45c4bebe1801a08ff0d89cad (diff)
xfs: add missing ilock around dio write last extent alignment
The iomap codepath (via get_blocks()) acquires and release the inode lock in the case of a direct write that requires block allocation. This is because xfs_iomap_write_direct() allocates a transaction, which means the ilock must be dropped and reacquired after the transaction is allocated and reserved. xfs_iomap_write_direct() invokes xfs_iomap_eof_align_last_fsb() before the transaction is created and thus before the ilock is reacquired. This can lead to calls to xfs_iread_extents() and reads of the in-core extent list without any synchronization (via xfs_bmap_eof() and xfs_bmap_last_extent()). xfs_iread_extents() assert fails if the ilock is not held, but this is not currently seen in practice as the current callers had already invoked xfs_bmapi_read(). What has been seen in practice are reports of crashes down in the xfs_bmap_eof() codepath on direct writes due to seemingly bogus pointer references from xfs_iext_get_ext(). While an explicit reproducer is not currently available to confirm the cause of the problem, crash analysis and code inspection from David Jeffrey had identified the insufficient locking. xfs_iomap_eof_align_last_fsb() is called from other contexts with the inode lock already held, so we cannot acquire it therein. __xfs_get_blocks() acquires and drops the ilock with variable flags to cover the event that the extent list must be read in. The common case is that __xfs_get_blocks() acquires the shared ilock. To provide locking around the last extent alignment call without adding more lock cycles to the dio path, update xfs_iomap_write_direct() to expect the shared ilock held on entry and do the extent alignment under its protection. Demote the lock, if necessary, from __xfs_get_blocks() and push the xfs_qm_dqattach() call outside of the shared lock critical section. Also, add an assert to document that the extent list is always expected to be present in this path. Otherwise, we risk a call to xfs_iread_extents() while under the shared ilock. This is safe as all current callers have executed an xfs_bmapi_read() call under the current iolock context. Reported-by: David Jeffery <djeffery@redhat.com> Signed-off-by: Brian Foster <bfoster@redhat.com> Reviewed-by: Dave Chinner <dchinner@redhat.com> Signed-off-by: Dave Chinner <david@fromorbit.com>
-rw-r--r--fs/xfs/xfs_aops.c10
-rw-r--r--fs/xfs/xfs_iomap.c33
-rw-r--r--fs/xfs/xfs_pnfs.c5
3 files changed, 36 insertions, 12 deletions
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index e485e31813fa..e4fff5898c1c 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -1408,12 +1408,12 @@ __xfs_get_blocks(
1408 imap.br_startblock == DELAYSTARTBLOCK))) { 1408 imap.br_startblock == DELAYSTARTBLOCK))) {
1409 if (direct || xfs_get_extsz_hint(ip)) { 1409 if (direct || xfs_get_extsz_hint(ip)) {
1410 /* 1410 /*
1411 * Drop the ilock in preparation for starting the block 1411 * xfs_iomap_write_direct() expects the shared lock. It
1412 * allocation transaction. It will be retaken 1412 * is unlocked on return.
1413 * exclusively inside xfs_iomap_write_direct for the
1414 * actual allocation.
1415 */ 1413 */
1416 xfs_iunlock(ip, lockmode); 1414 if (lockmode == XFS_ILOCK_EXCL)
1415 xfs_ilock_demote(ip, lockmode);
1416
1417 error = xfs_iomap_write_direct(ip, offset, size, 1417 error = xfs_iomap_write_direct(ip, offset, size,
1418 &imap, nimaps); 1418 &imap, nimaps);
1419 if (error) 1419 if (error)
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 1f86033171c8..1beda331d8a7 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -131,20 +131,29 @@ xfs_iomap_write_direct(
131 uint qblocks, resblks, resrtextents; 131 uint qblocks, resblks, resrtextents;
132 int committed; 132 int committed;
133 int error; 133 int error;
134 134 int lockmode;
135 error = xfs_qm_dqattach(ip, 0);
136 if (error)
137 return error;
138 135
139 rt = XFS_IS_REALTIME_INODE(ip); 136 rt = XFS_IS_REALTIME_INODE(ip);
140 extsz = xfs_get_extsz_hint(ip); 137 extsz = xfs_get_extsz_hint(ip);
138 lockmode = XFS_ILOCK_SHARED; /* locked by caller */
139
140 ASSERT(xfs_isilocked(ip, lockmode));
141 141
142 offset_fsb = XFS_B_TO_FSBT(mp, offset); 142 offset_fsb = XFS_B_TO_FSBT(mp, offset);
143 last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count))); 143 last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count)));
144 if ((offset + count) > XFS_ISIZE(ip)) { 144 if ((offset + count) > XFS_ISIZE(ip)) {
145 /*
146 * Assert that the in-core extent list is present since this can
147 * call xfs_iread_extents() and we only have the ilock shared.
148 * This should be safe because the lock was held around a bmapi
149 * call in the caller and we only need it to access the in-core
150 * list.
151 */
152 ASSERT(XFS_IFORK_PTR(ip, XFS_DATA_FORK)->if_flags &
153 XFS_IFEXTENTS);
145 error = xfs_iomap_eof_align_last_fsb(mp, ip, extsz, &last_fsb); 154 error = xfs_iomap_eof_align_last_fsb(mp, ip, extsz, &last_fsb);
146 if (error) 155 if (error)
147 return error; 156 goto out_unlock;
148 } else { 157 } else {
149 if (nmaps && (imap->br_startblock == HOLESTARTBLOCK)) 158 if (nmaps && (imap->br_startblock == HOLESTARTBLOCK))
150 last_fsb = MIN(last_fsb, (xfs_fileoff_t) 159 last_fsb = MIN(last_fsb, (xfs_fileoff_t)
@@ -174,6 +183,15 @@ xfs_iomap_write_direct(
174 } 183 }
175 184
176 /* 185 /*
186 * Drop the shared lock acquired by the caller, attach the dquot if
187 * necessary and move on to transaction setup.
188 */
189 xfs_iunlock(ip, lockmode);
190 error = xfs_qm_dqattach(ip, 0);
191 if (error)
192 return error;
193
194 /*
177 * Allocate and setup the transaction 195 * Allocate and setup the transaction
178 */ 196 */
179 tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); 197 tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
@@ -187,7 +205,8 @@ xfs_iomap_write_direct(
187 return error; 205 return error;
188 } 206 }
189 207
190 xfs_ilock(ip, XFS_ILOCK_EXCL); 208 lockmode = XFS_ILOCK_EXCL;
209 xfs_ilock(ip, lockmode);
191 210
192 error = xfs_trans_reserve_quota_nblks(tp, ip, qblocks, 0, quota_flag); 211 error = xfs_trans_reserve_quota_nblks(tp, ip, qblocks, 0, quota_flag);
193 if (error) 212 if (error)
@@ -229,7 +248,7 @@ xfs_iomap_write_direct(
229 error = xfs_alert_fsblock_zero(ip, imap); 248 error = xfs_alert_fsblock_zero(ip, imap);
230 249
231out_unlock: 250out_unlock:
232 xfs_iunlock(ip, XFS_ILOCK_EXCL); 251 xfs_iunlock(ip, lockmode);
233 return error; 252 return error;
234 253
235out_bmap_cancel: 254out_bmap_cancel:
diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c
index ab4a6066f7ca..dc6221942b85 100644
--- a/fs/xfs/xfs_pnfs.c
+++ b/fs/xfs/xfs_pnfs.c
@@ -181,6 +181,11 @@ xfs_fs_map_blocks(
181 ASSERT(imap.br_startblock != DELAYSTARTBLOCK); 181 ASSERT(imap.br_startblock != DELAYSTARTBLOCK);
182 182
183 if (!nimaps || imap.br_startblock == HOLESTARTBLOCK) { 183 if (!nimaps || imap.br_startblock == HOLESTARTBLOCK) {
184 /*
185 * xfs_iomap_write_direct() expects to take ownership of
186 * the shared ilock.
187 */
188 xfs_ilock(ip, XFS_ILOCK_SHARED);
184 error = xfs_iomap_write_direct(ip, offset, length, 189 error = xfs_iomap_write_direct(ip, offset, length,
185 &imap, nimaps); 190 &imap, nimaps);
186 if (error) 191 if (error)