aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/xfs/xfs_log_recover.c169
1 files changed, 156 insertions, 13 deletions
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 006ee288246d..dc100fed1973 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -1959,6 +1959,104 @@ xlog_recover_do_inode_buffer(
1959} 1959}
1960 1960
1961/* 1961/*
1962 * V5 filesystems know the age of the buffer on disk being recovered. We can
1963 * have newer objects on disk than we are replaying, and so for these cases we
1964 * don't want to replay the current change as that will make the buffer contents
1965 * temporarily invalid on disk.
1966 *
1967 * The magic number might not match the buffer type we are going to recover
1968 * (e.g. reallocated blocks), so we ignore the xfs_buf_log_format flags. Hence
1969 * extract the LSN of the existing object in the buffer based on it's current
1970 * magic number. If we don't recognise the magic number in the buffer, then
1971 * return a LSN of -1 so that the caller knows it was an unrecognised block and
1972 * so can recover the buffer.
1973 */
1974static xfs_lsn_t
1975xlog_recover_get_buf_lsn(
1976 struct xfs_mount *mp,
1977 struct xfs_buf *bp)
1978{
1979 __uint32_t magic32;
1980 __uint16_t magic16;
1981 __uint16_t magicda;
1982 void *blk = bp->b_addr;
1983
1984 /* v4 filesystems always recover immediately */
1985 if (!xfs_sb_version_hascrc(&mp->m_sb))
1986 goto recover_immediately;
1987
1988 magic32 = be32_to_cpu(*(__be32 *)blk);
1989 switch (magic32) {
1990 case XFS_ABTB_CRC_MAGIC:
1991 case XFS_ABTC_CRC_MAGIC:
1992 case XFS_ABTB_MAGIC:
1993 case XFS_ABTC_MAGIC:
1994 case XFS_IBT_CRC_MAGIC:
1995 case XFS_IBT_MAGIC:
1996 return be64_to_cpu(
1997 ((struct xfs_btree_block *)blk)->bb_u.s.bb_lsn);
1998 case XFS_BMAP_CRC_MAGIC:
1999 case XFS_BMAP_MAGIC:
2000 return be64_to_cpu(
2001 ((struct xfs_btree_block *)blk)->bb_u.l.bb_lsn);
2002 case XFS_AGF_MAGIC:
2003 return be64_to_cpu(((struct xfs_agf *)blk)->agf_lsn);
2004 case XFS_AGFL_MAGIC:
2005 return be64_to_cpu(((struct xfs_agfl *)blk)->agfl_lsn);
2006 case XFS_AGI_MAGIC:
2007 return be64_to_cpu(((struct xfs_agi *)blk)->agi_lsn);
2008 case XFS_SYMLINK_MAGIC:
2009 return be64_to_cpu(((struct xfs_dsymlink_hdr *)blk)->sl_lsn);
2010 case XFS_DIR3_BLOCK_MAGIC:
2011 case XFS_DIR3_DATA_MAGIC:
2012 case XFS_DIR3_FREE_MAGIC:
2013 return be64_to_cpu(((struct xfs_dir3_blk_hdr *)blk)->lsn);
2014 case XFS_ATTR3_RMT_MAGIC:
2015 return be64_to_cpu(((struct xfs_attr3_rmt_hdr *)blk)->rm_lsn);
2016 case XFS_SB_MAGIC:
2017 return be64_to_cpu(((struct xfs_sb *)blk)->sb_lsn);
2018 default:
2019 break;
2020 }
2021
2022 magicda = be16_to_cpu(((struct xfs_da_blkinfo *)blk)->magic);
2023 switch (magicda) {
2024 case XFS_DIR3_LEAF1_MAGIC:
2025 case XFS_DIR3_LEAFN_MAGIC:
2026 case XFS_DA3_NODE_MAGIC:
2027 return be64_to_cpu(((struct xfs_da3_blkinfo *)blk)->lsn);
2028 default:
2029 break;
2030 }
2031
2032 /*
2033 * We do individual object checks on dquot and inode buffers as they
2034 * have their own individual LSN records. Also, we could have a stale
2035 * buffer here, so we have to at least recognise these buffer types.
2036 *
2037 * A notd complexity here is inode unlinked list processing - it logs
2038 * the inode directly in the buffer, but we don't know which inodes have
2039 * been modified, and there is no global buffer LSN. Hence we need to
2040 * recover all inode buffer types immediately. This problem will be
2041 * fixed by logical logging of the unlinked list modifications.
2042 */
2043 magic16 = be16_to_cpu(*(__be16 *)blk);
2044 switch (magic16) {
2045 case XFS_DQUOT_MAGIC:
2046 case XFS_DINODE_MAGIC:
2047 goto recover_immediately;
2048 default:
2049 break;
2050 }
2051
2052 /* unknown buffer contents, recover immediately */
2053
2054recover_immediately:
2055 return (xfs_lsn_t)-1;
2056
2057}
2058
2059/*
1962 * Validate the recovered buffer is of the correct type and attach the 2060 * Validate the recovered buffer is of the correct type and attach the
1963 * appropriate buffer operations to them for writeback. Magic numbers are in a 2061 * appropriate buffer operations to them for writeback. Magic numbers are in a
1964 * few places: 2062 * few places:
@@ -1967,7 +2065,7 @@ xlog_recover_do_inode_buffer(
1967 * inside a struct xfs_da_blkinfo at the start of the buffer. 2065 * inside a struct xfs_da_blkinfo at the start of the buffer.
1968 */ 2066 */
1969static void 2067static void
1970xlog_recovery_validate_buf_type( 2068xlog_recover_validate_buf_type(
1971 struct xfs_mount *mp, 2069 struct xfs_mount *mp,
1972 struct xfs_buf *bp, 2070 struct xfs_buf *bp,
1973 xfs_buf_log_format_t *buf_f) 2071 xfs_buf_log_format_t *buf_f)
@@ -2246,7 +2344,7 @@ xlog_recover_do_reg_buffer(
2246 * just avoid the verification stage for non-crc filesystems 2344 * just avoid the verification stage for non-crc filesystems
2247 */ 2345 */
2248 if (xfs_sb_version_hascrc(&mp->m_sb)) 2346 if (xfs_sb_version_hascrc(&mp->m_sb))
2249 xlog_recovery_validate_buf_type(mp, bp, buf_f); 2347 xlog_recover_validate_buf_type(mp, bp, buf_f);
2250} 2348}
2251 2349
2252/* 2350/*
@@ -2444,13 +2542,15 @@ STATIC int
2444xlog_recover_buffer_pass2( 2542xlog_recover_buffer_pass2(
2445 struct xlog *log, 2543 struct xlog *log,
2446 struct list_head *buffer_list, 2544 struct list_head *buffer_list,
2447 struct xlog_recover_item *item) 2545 struct xlog_recover_item *item,
2546 xfs_lsn_t current_lsn)
2448{ 2547{
2449 xfs_buf_log_format_t *buf_f = item->ri_buf[0].i_addr; 2548 xfs_buf_log_format_t *buf_f = item->ri_buf[0].i_addr;
2450 xfs_mount_t *mp = log->l_mp; 2549 xfs_mount_t *mp = log->l_mp;
2451 xfs_buf_t *bp; 2550 xfs_buf_t *bp;
2452 int error; 2551 int error;
2453 uint buf_flags; 2552 uint buf_flags;
2553 xfs_lsn_t lsn;
2454 2554
2455 /* 2555 /*
2456 * In this pass we only want to recover all the buffers which have 2556 * In this pass we only want to recover all the buffers which have
@@ -2475,10 +2575,17 @@ xlog_recover_buffer_pass2(
2475 error = bp->b_error; 2575 error = bp->b_error;
2476 if (error) { 2576 if (error) {
2477 xfs_buf_ioerror_alert(bp, "xlog_recover_do..(read#1)"); 2577 xfs_buf_ioerror_alert(bp, "xlog_recover_do..(read#1)");
2478 xfs_buf_relse(bp); 2578 goto out_release;
2479 return error;
2480 } 2579 }
2481 2580
2581 /*
2582 * recover the buffer only if we get an LSN from it and it's less than
2583 * the lsn of the transaction we are replaying.
2584 */
2585 lsn = xlog_recover_get_buf_lsn(mp, bp);
2586 if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0)
2587 goto out_release;
2588
2482 if (buf_f->blf_flags & XFS_BLF_INODE_BUF) { 2589 if (buf_f->blf_flags & XFS_BLF_INODE_BUF) {
2483 error = xlog_recover_do_inode_buffer(mp, item, bp, buf_f); 2590 error = xlog_recover_do_inode_buffer(mp, item, bp, buf_f);
2484 } else if (buf_f->blf_flags & 2591 } else if (buf_f->blf_flags &
@@ -2488,7 +2595,7 @@ xlog_recover_buffer_pass2(
2488 xlog_recover_do_reg_buffer(mp, item, bp, buf_f); 2595 xlog_recover_do_reg_buffer(mp, item, bp, buf_f);
2489 } 2596 }
2490 if (error) 2597 if (error)
2491 return XFS_ERROR(error); 2598 goto out_release;
2492 2599
2493 /* 2600 /*
2494 * Perform delayed write on the buffer. Asynchronous writes will be 2601 * Perform delayed write on the buffer. Asynchronous writes will be
@@ -2517,6 +2624,7 @@ xlog_recover_buffer_pass2(
2517 xfs_buf_delwri_queue(bp, buffer_list); 2624 xfs_buf_delwri_queue(bp, buffer_list);
2518 } 2625 }
2519 2626
2627out_release:
2520 xfs_buf_relse(bp); 2628 xfs_buf_relse(bp);
2521 return error; 2629 return error;
2522} 2630}
@@ -2525,7 +2633,8 @@ STATIC int
2525xlog_recover_inode_pass2( 2633xlog_recover_inode_pass2(
2526 struct xlog *log, 2634 struct xlog *log,
2527 struct list_head *buffer_list, 2635 struct list_head *buffer_list,
2528 struct xlog_recover_item *item) 2636 struct xlog_recover_item *item,
2637 xfs_lsn_t current_lsn)
2529{ 2638{
2530 xfs_inode_log_format_t *in_f; 2639 xfs_inode_log_format_t *in_f;
2531 xfs_mount_t *mp = log->l_mp; 2640 xfs_mount_t *mp = log->l_mp;
@@ -2605,6 +2714,20 @@ xlog_recover_inode_pass2(
2605 } 2714 }
2606 2715
2607 /* 2716 /*
2717 * If the inode has an LSN in it, recover the inode only if it's less
2718 * than the lsn of the transaction we are replaying.
2719 */
2720 if (dip->di_version >= 3) {
2721 xfs_lsn_t lsn = be64_to_cpu(dip->di_lsn);
2722
2723 if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) {
2724 trace_xfs_log_recover_inode_skip(log, in_f);
2725 error = 0;
2726 goto out_release;
2727 }
2728 }
2729
2730 /*
2608 * di_flushiter is only valid for v1/2 inodes. All changes for v3 inodes 2731 * di_flushiter is only valid for v1/2 inodes. All changes for v3 inodes
2609 * are transactional and if ordering is necessary we can determine that 2732 * are transactional and if ordering is necessary we can determine that
2610 * more accurately by the LSN field in the V3 inode core. Don't trust 2733 * more accurately by the LSN field in the V3 inode core. Don't trust
@@ -2793,6 +2916,8 @@ write_inode_buffer:
2793 ASSERT(bp->b_target->bt_mount == mp); 2916 ASSERT(bp->b_target->bt_mount == mp);
2794 bp->b_iodone = xlog_recover_iodone; 2917 bp->b_iodone = xlog_recover_iodone;
2795 xfs_buf_delwri_queue(bp, buffer_list); 2918 xfs_buf_delwri_queue(bp, buffer_list);
2919
2920out_release:
2796 xfs_buf_relse(bp); 2921 xfs_buf_relse(bp);
2797error: 2922error:
2798 if (need_free) 2923 if (need_free)
@@ -2834,7 +2959,8 @@ STATIC int
2834xlog_recover_dquot_pass2( 2959xlog_recover_dquot_pass2(
2835 struct xlog *log, 2960 struct xlog *log,
2836 struct list_head *buffer_list, 2961 struct list_head *buffer_list,
2837 struct xlog_recover_item *item) 2962 struct xlog_recover_item *item,
2963 xfs_lsn_t current_lsn)
2838{ 2964{
2839 xfs_mount_t *mp = log->l_mp; 2965 xfs_mount_t *mp = log->l_mp;
2840 xfs_buf_t *bp; 2966 xfs_buf_t *bp;
@@ -2908,6 +3034,19 @@ xlog_recover_dquot_pass2(
2908 return XFS_ERROR(EIO); 3034 return XFS_ERROR(EIO);
2909 } 3035 }
2910 3036
3037 /*
3038 * If the dquot has an LSN in it, recover the dquot only if it's less
3039 * than the lsn of the transaction we are replaying.
3040 */
3041 if (xfs_sb_version_hascrc(&mp->m_sb)) {
3042 struct xfs_dqblk *dqb = (struct xfs_dqblk *)ddq;
3043 xfs_lsn_t lsn = be64_to_cpu(dqb->dd_lsn);
3044
3045 if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) {
3046 goto out_release;
3047 }
3048 }
3049
2911 memcpy(ddq, recddq, item->ri_buf[1].i_len); 3050 memcpy(ddq, recddq, item->ri_buf[1].i_len);
2912 if (xfs_sb_version_hascrc(&mp->m_sb)) { 3051 if (xfs_sb_version_hascrc(&mp->m_sb)) {
2913 xfs_update_cksum((char *)ddq, sizeof(struct xfs_dqblk), 3052 xfs_update_cksum((char *)ddq, sizeof(struct xfs_dqblk),
@@ -2918,9 +3057,10 @@ xlog_recover_dquot_pass2(
2918 ASSERT(bp->b_target->bt_mount == mp); 3057 ASSERT(bp->b_target->bt_mount == mp);
2919 bp->b_iodone = xlog_recover_iodone; 3058 bp->b_iodone = xlog_recover_iodone;
2920 xfs_buf_delwri_queue(bp, buffer_list); 3059 xfs_buf_delwri_queue(bp, buffer_list);
2921 xfs_buf_relse(bp);
2922 3060
2923 return (0); 3061out_release:
3062 xfs_buf_relse(bp);
3063 return 0;
2924} 3064}
2925 3065
2926/* 3066/*
@@ -3267,15 +3407,18 @@ xlog_recover_commit_pass2(
3267 3407
3268 switch (ITEM_TYPE(item)) { 3408 switch (ITEM_TYPE(item)) {
3269 case XFS_LI_BUF: 3409 case XFS_LI_BUF:
3270 return xlog_recover_buffer_pass2(log, buffer_list, item); 3410 return xlog_recover_buffer_pass2(log, buffer_list, item,
3411 trans->r_lsn);
3271 case XFS_LI_INODE: 3412 case XFS_LI_INODE:
3272 return xlog_recover_inode_pass2(log, buffer_list, item); 3413 return xlog_recover_inode_pass2(log, buffer_list, item,
3414 trans->r_lsn);
3273 case XFS_LI_EFI: 3415 case XFS_LI_EFI:
3274 return xlog_recover_efi_pass2(log, item, trans->r_lsn); 3416 return xlog_recover_efi_pass2(log, item, trans->r_lsn);
3275 case XFS_LI_EFD: 3417 case XFS_LI_EFD:
3276 return xlog_recover_efd_pass2(log, item); 3418 return xlog_recover_efd_pass2(log, item);
3277 case XFS_LI_DQUOT: 3419 case XFS_LI_DQUOT:
3278 return xlog_recover_dquot_pass2(log, buffer_list, item); 3420 return xlog_recover_dquot_pass2(log, buffer_list, item,
3421 trans->r_lsn);
3279 case XFS_LI_ICREATE: 3422 case XFS_LI_ICREATE:
3280 return xlog_recover_do_icreate_pass2(log, buffer_list, item); 3423 return xlog_recover_do_icreate_pass2(log, buffer_list, item);
3281 case XFS_LI_QUOTAOFF: 3424 case XFS_LI_QUOTAOFF: