aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs
diff options
context:
space:
mode:
authorDave Chinner <dchinner@redhat.com>2014-08-03 22:43:06 -0400
committerDave Chinner <david@fromorbit.com>2014-08-03 22:43:06 -0400
commit67dc288c21064b31a98a53dc64f6b9714b819fd6 (patch)
tree10f0325148f4cee03ae8151fb026183f449c88fa /fs/xfs
parent400b9d88757c0bfbdfa97014e090ec40a31c1282 (diff)
xfs: ensure verifiers are attached to recovered buffers
Crash testing of CRC enabled filesystems has resulted in a number of reports of bad CRCs being detected after the filesystem was mounted. Errors such as the following were being seen: XFS (sdb3): Mounting V5 Filesystem XFS (sdb3): Starting recovery (logdev: internal) XFS (sdb3): Metadata CRC error detected at xfs_agf_read_verify+0x5a/0x100 [xfs], block 0x1 XFS (sdb3): Unmount and run xfs_repair XFS (sdb3): First 64 bytes of corrupted metadata buffer: ffff880136ffd600: 58 41 47 46 00 00 00 01 00 00 00 00 00 0f aa 40 XAGF...........@ ffff880136ffd610: 00 02 6d 53 00 02 77 f8 00 00 00 00 00 00 00 01 ..mS..w......... ffff880136ffd620: 00 00 00 01 00 00 00 00 00 00 00 00 00 00 00 03 ................ ffff880136ffd630: 00 00 00 04 00 08 81 d0 00 08 81 a7 00 00 00 00 ................ XFS (sdb3): metadata I/O error: block 0x1 ("xfs_trans_read_buf_map") error 74 numblks 1 The errors were typically being seen in AGF, AGI and their related btree block buffers some time after log recovery had run. Often it wasn't until later subsequent mounts that the problem was discovered. The common symptom was a buffer with the correct contents, but a CRC and an LSN that matched an older version of the contents. Some debug added to _xfs_buf_ioapply() indicated that buffers were being written without verifiers attached to them from log recovery, and Jan Kara isolated the cause to log recovery readahead an dit's interactions with buffers that had a more recent LSN on disk than the transaction being recovered. In this case, the buffer did not get a verifier attached, and os when the second phase of log recovery ran and recovered EFIs and unlinked inodes, the buffers were modified and written without the verifier running. Hence they had up to date contents, but stale LSNs and CRCs. Fix it by attaching verifiers to buffers we skip due to future LSN values so they don't escape into the buffer cache without the correct verifier attached. This patch is based on analysis and a patch from Jan Kara. cc: <stable@vger.kernel.org> Reported-by: Jan Kara <jack@suse.cz> Reported-by: Fanael Linithien <fanael4@gmail.com> Reported-by: Grozdan <neutrino8@gmail.com> Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Brian Foster <bfoster@redhat.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Dave Chinner <david@fromorbit.com>
Diffstat (limited to 'fs/xfs')
-rw-r--r--fs/xfs/xfs_log_recover.c51
1 files changed, 31 insertions, 20 deletions
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index fbc2362d13e3..8a7d8a79a7be 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -2126,6 +2126,17 @@ xlog_recover_validate_buf_type(
2126 __uint16_t magic16; 2126 __uint16_t magic16;
2127 __uint16_t magicda; 2127 __uint16_t magicda;
2128 2128
2129 /*
2130 * We can only do post recovery validation on items on CRC enabled
2131 * fielsystems as we need to know when the buffer was written to be able
2132 * to determine if we should have replayed the item. If we replay old
2133 * metadata over a newer buffer, then it will enter a temporarily
2134 * inconsistent state resulting in verification failures. Hence for now
2135 * just avoid the verification stage for non-crc filesystems
2136 */
2137 if (!xfs_sb_version_hascrc(&mp->m_sb))
2138 return;
2139
2129 magic32 = be32_to_cpu(*(__be32 *)bp->b_addr); 2140 magic32 = be32_to_cpu(*(__be32 *)bp->b_addr);
2130 magic16 = be16_to_cpu(*(__be16*)bp->b_addr); 2141 magic16 = be16_to_cpu(*(__be16*)bp->b_addr);
2131 magicda = be16_to_cpu(info->magic); 2142 magicda = be16_to_cpu(info->magic);
@@ -2163,8 +2174,6 @@ xlog_recover_validate_buf_type(
2163 bp->b_ops = &xfs_agf_buf_ops; 2174 bp->b_ops = &xfs_agf_buf_ops;
2164 break; 2175 break;
2165 case XFS_BLFT_AGFL_BUF: 2176 case XFS_BLFT_AGFL_BUF:
2166 if (!xfs_sb_version_hascrc(&mp->m_sb))
2167 break;
2168 if (magic32 != XFS_AGFL_MAGIC) { 2177 if (magic32 != XFS_AGFL_MAGIC) {
2169 xfs_warn(mp, "Bad AGFL block magic!"); 2178 xfs_warn(mp, "Bad AGFL block magic!");
2170 ASSERT(0); 2179 ASSERT(0);
@@ -2197,10 +2206,6 @@ xlog_recover_validate_buf_type(
2197#endif 2206#endif
2198 break; 2207 break;
2199 case XFS_BLFT_DINO_BUF: 2208 case XFS_BLFT_DINO_BUF:
2200 /*
2201 * we get here with inode allocation buffers, not buffers that
2202 * track unlinked list changes.
2203 */
2204 if (magic16 != XFS_DINODE_MAGIC) { 2209 if (magic16 != XFS_DINODE_MAGIC) {
2205 xfs_warn(mp, "Bad INODE block magic!"); 2210 xfs_warn(mp, "Bad INODE block magic!");
2206 ASSERT(0); 2211 ASSERT(0);
@@ -2280,8 +2285,6 @@ xlog_recover_validate_buf_type(
2280 bp->b_ops = &xfs_attr3_leaf_buf_ops; 2285 bp->b_ops = &xfs_attr3_leaf_buf_ops;
2281 break; 2286 break;
2282 case XFS_BLFT_ATTR_RMT_BUF: 2287 case XFS_BLFT_ATTR_RMT_BUF:
2283 if (!xfs_sb_version_hascrc(&mp->m_sb))
2284 break;
2285 if (magic32 != XFS_ATTR3_RMT_MAGIC) { 2288 if (magic32 != XFS_ATTR3_RMT_MAGIC) {
2286 xfs_warn(mp, "Bad attr remote magic!"); 2289 xfs_warn(mp, "Bad attr remote magic!");
2287 ASSERT(0); 2290 ASSERT(0);
@@ -2388,16 +2391,7 @@ xlog_recover_do_reg_buffer(
2388 /* Shouldn't be any more regions */ 2391 /* Shouldn't be any more regions */
2389 ASSERT(i == item->ri_total); 2392 ASSERT(i == item->ri_total);
2390 2393
2391 /* 2394 xlog_recover_validate_buf_type(mp, bp, buf_f);
2392 * We can only do post recovery validation on items on CRC enabled
2393 * fielsystems as we need to know when the buffer was written to be able
2394 * to determine if we should have replayed the item. If we replay old
2395 * metadata over a newer buffer, then it will enter a temporarily
2396 * inconsistent state resulting in verification failures. Hence for now
2397 * just avoid the verification stage for non-crc filesystems
2398 */
2399 if (xfs_sb_version_hascrc(&mp->m_sb))
2400 xlog_recover_validate_buf_type(mp, bp, buf_f);
2401} 2395}
2402 2396
2403/* 2397/*
@@ -2505,12 +2499,29 @@ xlog_recover_buffer_pass2(
2505 } 2499 }
2506 2500
2507 /* 2501 /*
2508 * recover the buffer only if we get an LSN from it and it's less than 2502 * Recover the buffer only if we get an LSN from it and it's less than
2509 * the lsn of the transaction we are replaying. 2503 * the lsn of the transaction we are replaying.
2504 *
2505 * Note that we have to be extremely careful of readahead here.
2506 * Readahead does not attach verfiers to the buffers so if we don't
2507 * actually do any replay after readahead because of the LSN we found
2508 * in the buffer if more recent than that current transaction then we
2509 * need to attach the verifier directly. Failure to do so can lead to
2510 * future recovery actions (e.g. EFI and unlinked list recovery) can
2511 * operate on the buffers and they won't get the verifier attached. This
2512 * can lead to blocks on disk having the correct content but a stale
2513 * CRC.
2514 *
2515 * It is safe to assume these clean buffers are currently up to date.
2516 * If the buffer is dirtied by a later transaction being replayed, then
2517 * the verifier will be reset to match whatever recover turns that
2518 * buffer into.
2510 */ 2519 */
2511 lsn = xlog_recover_get_buf_lsn(mp, bp); 2520 lsn = xlog_recover_get_buf_lsn(mp, bp);
2512 if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) 2521 if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) {
2522 xlog_recover_validate_buf_type(mp, bp, buf_f);
2513 goto out_release; 2523 goto out_release;
2524 }
2514 2525
2515 if (buf_f->blf_flags & XFS_BLF_INODE_BUF) { 2526 if (buf_f->blf_flags & XFS_BLF_INODE_BUF) {
2516 error = xlog_recover_do_inode_buffer(mp, item, bp, buf_f); 2527 error = xlog_recover_do_inode_buffer(mp, item, bp, buf_f);