aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs
diff options
context:
space:
mode:
authorDave Chinner <dchinner@redhat.com>2016-01-11 15:03:44 -0500
committerDave Chinner <david@fromorbit.com>2016-01-11 15:03:44 -0500
commitb79f4a1c68bb99152d0785ee4ea3ab4396cdacc6 (patch)
tree0b213b3df19cf8b6fa80a4d01d1aa726c937b2b4 /fs/xfs
parentf6106efae5f4144b32f6c10de0dc3e7efc9181e3 (diff)
xfs: inode recovery readahead can race with inode buffer creation
When we do inode readahead in log recovery, we do can do the readahead before we've replayed the icreate transaction that stamps the buffer with inode cores. The inode readahead verifier catches this and marks the buffer as !done to indicate that it doesn't yet contain valid inodes. In adding buffer error notification (i.e. setting b_error = -EIO at the same time as as we clear the done flag) to such a readahead verifier failure, we can then get subsequent inode recovery failing with this error: XFS (dm-0): metadata I/O error: block 0xa00060 ("xlog_recover_do..(read#2)") error 5 numblks 32 This occurs when readahead completion races with icreate item replay such as: inode readahead find buffer lock buffer submit RA io .... icreate recovery xfs_trans_get_buffer find buffer lock buffer <blocks on RA completion> ..... <ra completion> fails verifier clear XBF_DONE set bp->b_error = -EIO release and unlock buffer <icreate gains lock> icreate initialises buffer marks buffer as done adds buffer to delayed write queue releases buffer At this point, we have an initialised inode buffer that is up to date but has an -EIO state registered against it. When we finally get to recovering an inode in that buffer: inode item recovery xfs_trans_read_buffer find buffer lock buffer sees XBF_DONE is set, returns buffer sees bp->b_error is set fail log recovery! Essentially, we need xfs_trans_get_buf_map() to clear the error status of the buffer when doing a lookup. This function returns uninitialised buffers, so the buffer returned can not be in an error state and none of the code that uses this function expects b_error to be set on return. Indeed, there is an ASSERT(!bp->b_error); in the transaction case in xfs_trans_get_buf_map() that would have caught this if log recovery used transactions.... This patch firstly changes the inode readahead failure to set -EIO on the buffer, and secondly changes xfs_buf_get_map() to never return a buffer with an error state set so this first change doesn't cause unexpected log recovery failures. cc: <stable@vger.kernel.org> # 3.12 - current Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Brian Foster <bfoster@redhat.com> Signed-off-by: Dave Chinner <david@fromorbit.com>
Diffstat (limited to 'fs/xfs')
-rw-r--r--fs/xfs/libxfs/xfs_inode_buf.c12
-rw-r--r--fs/xfs/xfs_buf.c7
2 files changed, 14 insertions, 5 deletions
diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
index 1b8d98a915c4..ff17c48e7ed3 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.c
+++ b/fs/xfs/libxfs/xfs_inode_buf.c
@@ -62,11 +62,12 @@ xfs_inobp_check(
62 * has not had the inode cores stamped into it. Hence for readahead, the buffer 62 * has not had the inode cores stamped into it. Hence for readahead, the buffer
63 * may be potentially invalid. 63 * may be potentially invalid.
64 * 64 *
65 * If the readahead buffer is invalid, we don't want to mark it with an error, 65 * If the readahead buffer is invalid, we need to mark it with an error and
66 * but we do want to clear the DONE status of the buffer so that a followup read 66 * clear the DONE status of the buffer so that a followup read will re-read it
67 * will re-read it from disk. This will ensure that we don't get an unnecessary 67 * from disk. We don't report the error otherwise to avoid warnings during log
68 * warnings during log recovery and we don't get unnecssary panics on debug 68 * recovery and we don't get unnecssary panics on debug kernels. We use EIO here
69 * kernels. 69 * because all we want to do is say readahead failed; there is no-one to report
70 * the error to, so this will distinguish it from a non-ra verifier failure.
70 */ 71 */
71static void 72static void
72xfs_inode_buf_verify( 73xfs_inode_buf_verify(
@@ -93,6 +94,7 @@ xfs_inode_buf_verify(
93 XFS_RANDOM_ITOBP_INOTOBP))) { 94 XFS_RANDOM_ITOBP_INOTOBP))) {
94 if (readahead) { 95 if (readahead) {
95 bp->b_flags &= ~XBF_DONE; 96 bp->b_flags &= ~XBF_DONE;
97 xfs_buf_ioerror(bp, -EIO);
96 return; 98 return;
97 } 99 }
98 100
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 45a8ea7cfdb2..ae86b16f9025 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -604,6 +604,13 @@ found:
604 } 604 }
605 } 605 }
606 606
607 /*
608 * Clear b_error if this is a lookup from a caller that doesn't expect
609 * valid data to be found in the buffer.
610 */
611 if (!(flags & XBF_READ))
612 xfs_buf_ioerror(bp, 0);
613
607 XFS_STATS_INC(target->bt_mount, xb_get); 614 XFS_STATS_INC(target->bt_mount, xb_get);
608 trace_xfs_buf_get(bp, flags, _RET_IP_); 615 trace_xfs_buf_get(bp, flags, _RET_IP_);
609 return bp; 616 return bp;