diff options
Diffstat (limited to 'fs/xfs/xfs_log_recover.c')
-rw-r--r-- | fs/xfs/xfs_log_recover.c | 569 |
1 files changed, 493 insertions, 76 deletions
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 7681b19aa5dc..39797490a1f1 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c | |||
@@ -17,7 +17,7 @@ | |||
17 | */ | 17 | */ |
18 | #include "xfs.h" | 18 | #include "xfs.h" |
19 | #include "xfs_fs.h" | 19 | #include "xfs_fs.h" |
20 | #include "xfs_types.h" | 20 | #include "xfs_format.h" |
21 | #include "xfs_bit.h" | 21 | #include "xfs_bit.h" |
22 | #include "xfs_log.h" | 22 | #include "xfs_log.h" |
23 | #include "xfs_inum.h" | 23 | #include "xfs_inum.h" |
@@ -41,7 +41,6 @@ | |||
41 | #include "xfs_extfree_item.h" | 41 | #include "xfs_extfree_item.h" |
42 | #include "xfs_trans_priv.h" | 42 | #include "xfs_trans_priv.h" |
43 | #include "xfs_quota.h" | 43 | #include "xfs_quota.h" |
44 | #include "xfs_utils.h" | ||
45 | #include "xfs_cksum.h" | 44 | #include "xfs_cksum.h" |
46 | #include "xfs_trace.h" | 45 | #include "xfs_trace.h" |
47 | #include "xfs_icache.h" | 46 | #include "xfs_icache.h" |
@@ -51,10 +50,12 @@ | |||
51 | #include "xfs_symlink.h" | 50 | #include "xfs_symlink.h" |
52 | #include "xfs_da_btree.h" | 51 | #include "xfs_da_btree.h" |
53 | #include "xfs_dir2_format.h" | 52 | #include "xfs_dir2_format.h" |
54 | #include "xfs_dir2_priv.h" | 53 | #include "xfs_dir2.h" |
55 | #include "xfs_attr_leaf.h" | 54 | #include "xfs_attr_leaf.h" |
56 | #include "xfs_attr_remote.h" | 55 | #include "xfs_attr_remote.h" |
57 | 56 | ||
57 | #define BLK_AVG(blk1, blk2) ((blk1+blk2) >> 1) | ||
58 | |||
58 | STATIC int | 59 | STATIC int |
59 | xlog_find_zeroed( | 60 | xlog_find_zeroed( |
60 | struct xlog *, | 61 | struct xlog *, |
@@ -607,7 +608,7 @@ out: | |||
607 | 608 | ||
608 | /* | 609 | /* |
609 | * Head is defined to be the point of the log where the next log write | 610 | * Head is defined to be the point of the log where the next log write |
610 | * write could go. This means that incomplete LR writes at the end are | 611 | * could go. This means that incomplete LR writes at the end are |
611 | * eliminated when calculating the head. We aren't guaranteed that previous | 612 | * eliminated when calculating the head. We aren't guaranteed that previous |
612 | * LR have complete transactions. We only know that a cycle number of | 613 | * LR have complete transactions. We only know that a cycle number of |
613 | * current cycle number -1 won't be present in the log if we start writing | 614 | * current cycle number -1 won't be present in the log if we start writing |
@@ -963,6 +964,7 @@ xlog_find_tail( | |||
963 | } | 964 | } |
964 | if (!found) { | 965 | if (!found) { |
965 | xfs_warn(log->l_mp, "%s: couldn't find sync record", __func__); | 966 | xfs_warn(log->l_mp, "%s: couldn't find sync record", __func__); |
967 | xlog_put_bp(bp); | ||
966 | ASSERT(0); | 968 | ASSERT(0); |
967 | return XFS_ERROR(EIO); | 969 | return XFS_ERROR(EIO); |
968 | } | 970 | } |
@@ -1144,7 +1146,8 @@ xlog_find_zeroed( | |||
1144 | */ | 1146 | */ |
1145 | xfs_warn(log->l_mp, | 1147 | xfs_warn(log->l_mp, |
1146 | "Log inconsistent or not a log (last==0, first!=1)"); | 1148 | "Log inconsistent or not a log (last==0, first!=1)"); |
1147 | return XFS_ERROR(EINVAL); | 1149 | error = XFS_ERROR(EINVAL); |
1150 | goto bp_err; | ||
1148 | } | 1151 | } |
1149 | 1152 | ||
1150 | /* we have a partially zeroed log */ | 1153 | /* we have a partially zeroed log */ |
@@ -1582,6 +1585,7 @@ xlog_recover_add_to_trans( | |||
1582 | "bad number of regions (%d) in inode log format", | 1585 | "bad number of regions (%d) in inode log format", |
1583 | in_f->ilf_size); | 1586 | in_f->ilf_size); |
1584 | ASSERT(0); | 1587 | ASSERT(0); |
1588 | kmem_free(ptr); | ||
1585 | return XFS_ERROR(EIO); | 1589 | return XFS_ERROR(EIO); |
1586 | } | 1590 | } |
1587 | 1591 | ||
@@ -1766,19 +1770,11 @@ xlog_recover_buffer_pass1( | |||
1766 | 1770 | ||
1767 | /* | 1771 | /* |
1768 | * Check to see whether the buffer being recovered has a corresponding | 1772 | * Check to see whether the buffer being recovered has a corresponding |
1769 | * entry in the buffer cancel record table. If it does then return 1 | 1773 | * entry in the buffer cancel record table. If it is, return the cancel |
1770 | * so that it will be cancelled, otherwise return 0. If the buffer is | 1774 | * buffer structure to the caller. |
1771 | * actually a buffer cancel item (XFS_BLF_CANCEL is set), then decrement | ||
1772 | * the refcount on the entry in the table and remove it from the table | ||
1773 | * if this is the last reference. | ||
1774 | * | ||
1775 | * We remove the cancel record from the table when we encounter its | ||
1776 | * last occurrence in the log so that if the same buffer is re-used | ||
1777 | * again after its last cancellation we actually replay the changes | ||
1778 | * made at that point. | ||
1779 | */ | 1775 | */ |
1780 | STATIC int | 1776 | STATIC struct xfs_buf_cancel * |
1781 | xlog_check_buffer_cancelled( | 1777 | xlog_peek_buffer_cancelled( |
1782 | struct xlog *log, | 1778 | struct xlog *log, |
1783 | xfs_daddr_t blkno, | 1779 | xfs_daddr_t blkno, |
1784 | uint len, | 1780 | uint len, |
@@ -1787,22 +1783,16 @@ xlog_check_buffer_cancelled( | |||
1787 | struct list_head *bucket; | 1783 | struct list_head *bucket; |
1788 | struct xfs_buf_cancel *bcp; | 1784 | struct xfs_buf_cancel *bcp; |
1789 | 1785 | ||
1790 | if (log->l_buf_cancel_table == NULL) { | 1786 | if (!log->l_buf_cancel_table) { |
1791 | /* | 1787 | /* empty table means no cancelled buffers in the log */ |
1792 | * There is nothing in the table built in pass one, | ||
1793 | * so this buffer must not be cancelled. | ||
1794 | */ | ||
1795 | ASSERT(!(flags & XFS_BLF_CANCEL)); | 1788 | ASSERT(!(flags & XFS_BLF_CANCEL)); |
1796 | return 0; | 1789 | return NULL; |
1797 | } | 1790 | } |
1798 | 1791 | ||
1799 | /* | ||
1800 | * Search for an entry in the cancel table that matches our buffer. | ||
1801 | */ | ||
1802 | bucket = XLOG_BUF_CANCEL_BUCKET(log, blkno); | 1792 | bucket = XLOG_BUF_CANCEL_BUCKET(log, blkno); |
1803 | list_for_each_entry(bcp, bucket, bc_list) { | 1793 | list_for_each_entry(bcp, bucket, bc_list) { |
1804 | if (bcp->bc_blkno == blkno && bcp->bc_len == len) | 1794 | if (bcp->bc_blkno == blkno && bcp->bc_len == len) |
1805 | goto found; | 1795 | return bcp; |
1806 | } | 1796 | } |
1807 | 1797 | ||
1808 | /* | 1798 | /* |
@@ -1810,9 +1800,32 @@ xlog_check_buffer_cancelled( | |||
1810 | * that the buffer is NOT cancelled. | 1800 | * that the buffer is NOT cancelled. |
1811 | */ | 1801 | */ |
1812 | ASSERT(!(flags & XFS_BLF_CANCEL)); | 1802 | ASSERT(!(flags & XFS_BLF_CANCEL)); |
1813 | return 0; | 1803 | return NULL; |
1804 | } | ||
1805 | |||
1806 | /* | ||
1807 | * If the buffer is being cancelled then return 1 so that it will be cancelled, | ||
1808 | * otherwise return 0. If the buffer is actually a buffer cancel item | ||
1809 | * (XFS_BLF_CANCEL is set), then decrement the refcount on the entry in the | ||
1810 | * table and remove it from the table if this is the last reference. | ||
1811 | * | ||
1812 | * We remove the cancel record from the table when we encounter its last | ||
1813 | * occurrence in the log so that if the same buffer is re-used again after its | ||
1814 | * last cancellation we actually replay the changes made at that point. | ||
1815 | */ | ||
1816 | STATIC int | ||
1817 | xlog_check_buffer_cancelled( | ||
1818 | struct xlog *log, | ||
1819 | xfs_daddr_t blkno, | ||
1820 | uint len, | ||
1821 | ushort flags) | ||
1822 | { | ||
1823 | struct xfs_buf_cancel *bcp; | ||
1824 | |||
1825 | bcp = xlog_peek_buffer_cancelled(log, blkno, len, flags); | ||
1826 | if (!bcp) | ||
1827 | return 0; | ||
1814 | 1828 | ||
1815 | found: | ||
1816 | /* | 1829 | /* |
1817 | * We've go a match, so return 1 so that the recovery of this buffer | 1830 | * We've go a match, so return 1 so that the recovery of this buffer |
1818 | * is cancelled. If this buffer is actually a buffer cancel log | 1831 | * is cancelled. If this buffer is actually a buffer cancel log |
@@ -1947,6 +1960,149 @@ xlog_recover_do_inode_buffer( | |||
1947 | } | 1960 | } |
1948 | 1961 | ||
1949 | /* | 1962 | /* |
1963 | * V5 filesystems know the age of the buffer on disk being recovered. We can | ||
1964 | * have newer objects on disk than we are replaying, and so for these cases we | ||
1965 | * don't want to replay the current change as that will make the buffer contents | ||
1966 | * temporarily invalid on disk. | ||
1967 | * | ||
1968 | * The magic number might not match the buffer type we are going to recover | ||
1969 | * (e.g. reallocated blocks), so we ignore the xfs_buf_log_format flags. Hence | ||
1970 | * extract the LSN of the existing object in the buffer based on it's current | ||
1971 | * magic number. If we don't recognise the magic number in the buffer, then | ||
1972 | * return a LSN of -1 so that the caller knows it was an unrecognised block and | ||
1973 | * so can recover the buffer. | ||
1974 | * | ||
1975 | * Note: we cannot rely solely on magic number matches to determine that the | ||
1976 | * buffer has a valid LSN - we also need to verify that it belongs to this | ||
1977 | * filesystem, so we need to extract the object's LSN and compare it to that | ||
1978 | * which we read from the superblock. If the UUIDs don't match, then we've got a | ||
1979 | * stale metadata block from an old filesystem instance that we need to recover | ||
1980 | * over the top of. | ||
1981 | */ | ||
1982 | static xfs_lsn_t | ||
1983 | xlog_recover_get_buf_lsn( | ||
1984 | struct xfs_mount *mp, | ||
1985 | struct xfs_buf *bp) | ||
1986 | { | ||
1987 | __uint32_t magic32; | ||
1988 | __uint16_t magic16; | ||
1989 | __uint16_t magicda; | ||
1990 | void *blk = bp->b_addr; | ||
1991 | uuid_t *uuid; | ||
1992 | xfs_lsn_t lsn = -1; | ||
1993 | |||
1994 | /* v4 filesystems always recover immediately */ | ||
1995 | if (!xfs_sb_version_hascrc(&mp->m_sb)) | ||
1996 | goto recover_immediately; | ||
1997 | |||
1998 | magic32 = be32_to_cpu(*(__be32 *)blk); | ||
1999 | switch (magic32) { | ||
2000 | case XFS_ABTB_CRC_MAGIC: | ||
2001 | case XFS_ABTC_CRC_MAGIC: | ||
2002 | case XFS_ABTB_MAGIC: | ||
2003 | case XFS_ABTC_MAGIC: | ||
2004 | case XFS_IBT_CRC_MAGIC: | ||
2005 | case XFS_IBT_MAGIC: { | ||
2006 | struct xfs_btree_block *btb = blk; | ||
2007 | |||
2008 | lsn = be64_to_cpu(btb->bb_u.s.bb_lsn); | ||
2009 | uuid = &btb->bb_u.s.bb_uuid; | ||
2010 | break; | ||
2011 | } | ||
2012 | case XFS_BMAP_CRC_MAGIC: | ||
2013 | case XFS_BMAP_MAGIC: { | ||
2014 | struct xfs_btree_block *btb = blk; | ||
2015 | |||
2016 | lsn = be64_to_cpu(btb->bb_u.l.bb_lsn); | ||
2017 | uuid = &btb->bb_u.l.bb_uuid; | ||
2018 | break; | ||
2019 | } | ||
2020 | case XFS_AGF_MAGIC: | ||
2021 | lsn = be64_to_cpu(((struct xfs_agf *)blk)->agf_lsn); | ||
2022 | uuid = &((struct xfs_agf *)blk)->agf_uuid; | ||
2023 | break; | ||
2024 | case XFS_AGFL_MAGIC: | ||
2025 | lsn = be64_to_cpu(((struct xfs_agfl *)blk)->agfl_lsn); | ||
2026 | uuid = &((struct xfs_agfl *)blk)->agfl_uuid; | ||
2027 | break; | ||
2028 | case XFS_AGI_MAGIC: | ||
2029 | lsn = be64_to_cpu(((struct xfs_agi *)blk)->agi_lsn); | ||
2030 | uuid = &((struct xfs_agi *)blk)->agi_uuid; | ||
2031 | break; | ||
2032 | case XFS_SYMLINK_MAGIC: | ||
2033 | lsn = be64_to_cpu(((struct xfs_dsymlink_hdr *)blk)->sl_lsn); | ||
2034 | uuid = &((struct xfs_dsymlink_hdr *)blk)->sl_uuid; | ||
2035 | break; | ||
2036 | case XFS_DIR3_BLOCK_MAGIC: | ||
2037 | case XFS_DIR3_DATA_MAGIC: | ||
2038 | case XFS_DIR3_FREE_MAGIC: | ||
2039 | lsn = be64_to_cpu(((struct xfs_dir3_blk_hdr *)blk)->lsn); | ||
2040 | uuid = &((struct xfs_dir3_blk_hdr *)blk)->uuid; | ||
2041 | break; | ||
2042 | case XFS_ATTR3_RMT_MAGIC: | ||
2043 | lsn = be64_to_cpu(((struct xfs_attr3_rmt_hdr *)blk)->rm_lsn); | ||
2044 | uuid = &((struct xfs_attr3_rmt_hdr *)blk)->rm_uuid; | ||
2045 | break; | ||
2046 | case XFS_SB_MAGIC: | ||
2047 | lsn = be64_to_cpu(((struct xfs_dsb *)blk)->sb_lsn); | ||
2048 | uuid = &((struct xfs_dsb *)blk)->sb_uuid; | ||
2049 | break; | ||
2050 | default: | ||
2051 | break; | ||
2052 | } | ||
2053 | |||
2054 | if (lsn != (xfs_lsn_t)-1) { | ||
2055 | if (!uuid_equal(&mp->m_sb.sb_uuid, uuid)) | ||
2056 | goto recover_immediately; | ||
2057 | return lsn; | ||
2058 | } | ||
2059 | |||
2060 | magicda = be16_to_cpu(((struct xfs_da_blkinfo *)blk)->magic); | ||
2061 | switch (magicda) { | ||
2062 | case XFS_DIR3_LEAF1_MAGIC: | ||
2063 | case XFS_DIR3_LEAFN_MAGIC: | ||
2064 | case XFS_DA3_NODE_MAGIC: | ||
2065 | lsn = be64_to_cpu(((struct xfs_da3_blkinfo *)blk)->lsn); | ||
2066 | uuid = &((struct xfs_da3_blkinfo *)blk)->uuid; | ||
2067 | break; | ||
2068 | default: | ||
2069 | break; | ||
2070 | } | ||
2071 | |||
2072 | if (lsn != (xfs_lsn_t)-1) { | ||
2073 | if (!uuid_equal(&mp->m_sb.sb_uuid, uuid)) | ||
2074 | goto recover_immediately; | ||
2075 | return lsn; | ||
2076 | } | ||
2077 | |||
2078 | /* | ||
2079 | * We do individual object checks on dquot and inode buffers as they | ||
2080 | * have their own individual LSN records. Also, we could have a stale | ||
2081 | * buffer here, so we have to at least recognise these buffer types. | ||
2082 | * | ||
2083 | * A notd complexity here is inode unlinked list processing - it logs | ||
2084 | * the inode directly in the buffer, but we don't know which inodes have | ||
2085 | * been modified, and there is no global buffer LSN. Hence we need to | ||
2086 | * recover all inode buffer types immediately. This problem will be | ||
2087 | * fixed by logical logging of the unlinked list modifications. | ||
2088 | */ | ||
2089 | magic16 = be16_to_cpu(*(__be16 *)blk); | ||
2090 | switch (magic16) { | ||
2091 | case XFS_DQUOT_MAGIC: | ||
2092 | case XFS_DINODE_MAGIC: | ||
2093 | goto recover_immediately; | ||
2094 | default: | ||
2095 | break; | ||
2096 | } | ||
2097 | |||
2098 | /* unknown buffer contents, recover immediately */ | ||
2099 | |||
2100 | recover_immediately: | ||
2101 | return (xfs_lsn_t)-1; | ||
2102 | |||
2103 | } | ||
2104 | |||
2105 | /* | ||
1950 | * Validate the recovered buffer is of the correct type and attach the | 2106 | * Validate the recovered buffer is of the correct type and attach the |
1951 | * appropriate buffer operations to them for writeback. Magic numbers are in a | 2107 | * appropriate buffer operations to them for writeback. Magic numbers are in a |
1952 | * few places: | 2108 | * few places: |
@@ -1955,7 +2111,7 @@ xlog_recover_do_inode_buffer( | |||
1955 | * inside a struct xfs_da_blkinfo at the start of the buffer. | 2111 | * inside a struct xfs_da_blkinfo at the start of the buffer. |
1956 | */ | 2112 | */ |
1957 | static void | 2113 | static void |
1958 | xlog_recovery_validate_buf_type( | 2114 | xlog_recover_validate_buf_type( |
1959 | struct xfs_mount *mp, | 2115 | struct xfs_mount *mp, |
1960 | struct xfs_buf *bp, | 2116 | struct xfs_buf *bp, |
1961 | xfs_buf_log_format_t *buf_f) | 2117 | xfs_buf_log_format_t *buf_f) |
@@ -2234,7 +2390,7 @@ xlog_recover_do_reg_buffer( | |||
2234 | * just avoid the verification stage for non-crc filesystems | 2390 | * just avoid the verification stage for non-crc filesystems |
2235 | */ | 2391 | */ |
2236 | if (xfs_sb_version_hascrc(&mp->m_sb)) | 2392 | if (xfs_sb_version_hascrc(&mp->m_sb)) |
2237 | xlog_recovery_validate_buf_type(mp, bp, buf_f); | 2393 | xlog_recover_validate_buf_type(mp, bp, buf_f); |
2238 | } | 2394 | } |
2239 | 2395 | ||
2240 | /* | 2396 | /* |
@@ -2366,7 +2522,7 @@ xfs_qm_dqcheck( | |||
2366 | 2522 | ||
2367 | /* | 2523 | /* |
2368 | * Perform a dquot buffer recovery. | 2524 | * Perform a dquot buffer recovery. |
2369 | * Simple algorithm: if we have found a QUOTAOFF logitem of the same type | 2525 | * Simple algorithm: if we have found a QUOTAOFF log item of the same type |
2370 | * (ie. USR or GRP), then just toss this buffer away; don't recover it. | 2526 | * (ie. USR or GRP), then just toss this buffer away; don't recover it. |
2371 | * Else, treat it as a regular buffer and do recovery. | 2527 | * Else, treat it as a regular buffer and do recovery. |
2372 | */ | 2528 | */ |
@@ -2425,20 +2581,22 @@ xlog_recover_do_dquot_buffer( | |||
2425 | * over the log during recovery. During the first we build a table of | 2581 | * over the log during recovery. During the first we build a table of |
2426 | * those buffers which have been cancelled, and during the second we | 2582 | * those buffers which have been cancelled, and during the second we |
2427 | * only replay those buffers which do not have corresponding cancel | 2583 | * only replay those buffers which do not have corresponding cancel |
2428 | * records in the table. See xlog_recover_do_buffer_pass[1,2] above | 2584 | * records in the table. See xlog_recover_buffer_pass[1,2] above |
2429 | * for more details on the implementation of the table of cancel records. | 2585 | * for more details on the implementation of the table of cancel records. |
2430 | */ | 2586 | */ |
2431 | STATIC int | 2587 | STATIC int |
2432 | xlog_recover_buffer_pass2( | 2588 | xlog_recover_buffer_pass2( |
2433 | struct xlog *log, | 2589 | struct xlog *log, |
2434 | struct list_head *buffer_list, | 2590 | struct list_head *buffer_list, |
2435 | struct xlog_recover_item *item) | 2591 | struct xlog_recover_item *item, |
2592 | xfs_lsn_t current_lsn) | ||
2436 | { | 2593 | { |
2437 | xfs_buf_log_format_t *buf_f = item->ri_buf[0].i_addr; | 2594 | xfs_buf_log_format_t *buf_f = item->ri_buf[0].i_addr; |
2438 | xfs_mount_t *mp = log->l_mp; | 2595 | xfs_mount_t *mp = log->l_mp; |
2439 | xfs_buf_t *bp; | 2596 | xfs_buf_t *bp; |
2440 | int error; | 2597 | int error; |
2441 | uint buf_flags; | 2598 | uint buf_flags; |
2599 | xfs_lsn_t lsn; | ||
2442 | 2600 | ||
2443 | /* | 2601 | /* |
2444 | * In this pass we only want to recover all the buffers which have | 2602 | * In this pass we only want to recover all the buffers which have |
@@ -2463,10 +2621,17 @@ xlog_recover_buffer_pass2( | |||
2463 | error = bp->b_error; | 2621 | error = bp->b_error; |
2464 | if (error) { | 2622 | if (error) { |
2465 | xfs_buf_ioerror_alert(bp, "xlog_recover_do..(read#1)"); | 2623 | xfs_buf_ioerror_alert(bp, "xlog_recover_do..(read#1)"); |
2466 | xfs_buf_relse(bp); | 2624 | goto out_release; |
2467 | return error; | ||
2468 | } | 2625 | } |
2469 | 2626 | ||
2627 | /* | ||
2628 | * recover the buffer only if we get an LSN from it and it's less than | ||
2629 | * the lsn of the transaction we are replaying. | ||
2630 | */ | ||
2631 | lsn = xlog_recover_get_buf_lsn(mp, bp); | ||
2632 | if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) | ||
2633 | goto out_release; | ||
2634 | |||
2470 | if (buf_f->blf_flags & XFS_BLF_INODE_BUF) { | 2635 | if (buf_f->blf_flags & XFS_BLF_INODE_BUF) { |
2471 | error = xlog_recover_do_inode_buffer(mp, item, bp, buf_f); | 2636 | error = xlog_recover_do_inode_buffer(mp, item, bp, buf_f); |
2472 | } else if (buf_f->blf_flags & | 2637 | } else if (buf_f->blf_flags & |
@@ -2476,7 +2641,7 @@ xlog_recover_buffer_pass2( | |||
2476 | xlog_recover_do_reg_buffer(mp, item, bp, buf_f); | 2641 | xlog_recover_do_reg_buffer(mp, item, bp, buf_f); |
2477 | } | 2642 | } |
2478 | if (error) | 2643 | if (error) |
2479 | return XFS_ERROR(error); | 2644 | goto out_release; |
2480 | 2645 | ||
2481 | /* | 2646 | /* |
2482 | * Perform delayed write on the buffer. Asynchronous writes will be | 2647 | * Perform delayed write on the buffer. Asynchronous writes will be |
@@ -2505,15 +2670,93 @@ xlog_recover_buffer_pass2( | |||
2505 | xfs_buf_delwri_queue(bp, buffer_list); | 2670 | xfs_buf_delwri_queue(bp, buffer_list); |
2506 | } | 2671 | } |
2507 | 2672 | ||
2673 | out_release: | ||
2508 | xfs_buf_relse(bp); | 2674 | xfs_buf_relse(bp); |
2509 | return error; | 2675 | return error; |
2510 | } | 2676 | } |
2511 | 2677 | ||
2678 | /* | ||
2679 | * Inode fork owner changes | ||
2680 | * | ||
2681 | * If we have been told that we have to reparent the inode fork, it's because an | ||
2682 | * extent swap operation on a CRC enabled filesystem has been done and we are | ||
2683 | * replaying it. We need to walk the BMBT of the appropriate fork and change the | ||
2684 | * owners of it. | ||
2685 | * | ||
2686 | * The complexity here is that we don't have an inode context to work with, so | ||
2687 | * after we've replayed the inode we need to instantiate one. This is where the | ||
2688 | * fun begins. | ||
2689 | * | ||
2690 | * We are in the middle of log recovery, so we can't run transactions. That | ||
2691 | * means we cannot use cache coherent inode instantiation via xfs_iget(), as | ||
2692 | * that will result in the corresponding iput() running the inode through | ||
2693 | * xfs_inactive(). If we've just replayed an inode core that changes the link | ||
2694 | * count to zero (i.e. it's been unlinked), then xfs_inactive() will run | ||
2695 | * transactions (bad!). | ||
2696 | * | ||
2697 | * So, to avoid this, we instantiate an inode directly from the inode core we've | ||
2698 | * just recovered. We have the buffer still locked, and all we really need to | ||
2699 | * instantiate is the inode core and the forks being modified. We can do this | ||
2700 | * manually, then run the inode btree owner change, and then tear down the | ||
2701 | * xfs_inode without having to run any transactions at all. | ||
2702 | * | ||
2703 | * Also, because we don't have a transaction context available here but need to | ||
2704 | * gather all the buffers we modify for writeback so we pass the buffer_list | ||
2705 | * instead for the operation to use. | ||
2706 | */ | ||
2707 | |||
2708 | STATIC int | ||
2709 | xfs_recover_inode_owner_change( | ||
2710 | struct xfs_mount *mp, | ||
2711 | struct xfs_dinode *dip, | ||
2712 | struct xfs_inode_log_format *in_f, | ||
2713 | struct list_head *buffer_list) | ||
2714 | { | ||
2715 | struct xfs_inode *ip; | ||
2716 | int error; | ||
2717 | |||
2718 | ASSERT(in_f->ilf_fields & (XFS_ILOG_DOWNER|XFS_ILOG_AOWNER)); | ||
2719 | |||
2720 | ip = xfs_inode_alloc(mp, in_f->ilf_ino); | ||
2721 | if (!ip) | ||
2722 | return ENOMEM; | ||
2723 | |||
2724 | /* instantiate the inode */ | ||
2725 | xfs_dinode_from_disk(&ip->i_d, dip); | ||
2726 | ASSERT(ip->i_d.di_version >= 3); | ||
2727 | |||
2728 | error = xfs_iformat_fork(ip, dip); | ||
2729 | if (error) | ||
2730 | goto out_free_ip; | ||
2731 | |||
2732 | |||
2733 | if (in_f->ilf_fields & XFS_ILOG_DOWNER) { | ||
2734 | ASSERT(in_f->ilf_fields & XFS_ILOG_DBROOT); | ||
2735 | error = xfs_bmbt_change_owner(NULL, ip, XFS_DATA_FORK, | ||
2736 | ip->i_ino, buffer_list); | ||
2737 | if (error) | ||
2738 | goto out_free_ip; | ||
2739 | } | ||
2740 | |||
2741 | if (in_f->ilf_fields & XFS_ILOG_AOWNER) { | ||
2742 | ASSERT(in_f->ilf_fields & XFS_ILOG_ABROOT); | ||
2743 | error = xfs_bmbt_change_owner(NULL, ip, XFS_ATTR_FORK, | ||
2744 | ip->i_ino, buffer_list); | ||
2745 | if (error) | ||
2746 | goto out_free_ip; | ||
2747 | } | ||
2748 | |||
2749 | out_free_ip: | ||
2750 | xfs_inode_free(ip); | ||
2751 | return error; | ||
2752 | } | ||
2753 | |||
2512 | STATIC int | 2754 | STATIC int |
2513 | xlog_recover_inode_pass2( | 2755 | xlog_recover_inode_pass2( |
2514 | struct xlog *log, | 2756 | struct xlog *log, |
2515 | struct list_head *buffer_list, | 2757 | struct list_head *buffer_list, |
2516 | struct xlog_recover_item *item) | 2758 | struct xlog_recover_item *item, |
2759 | xfs_lsn_t current_lsn) | ||
2517 | { | 2760 | { |
2518 | xfs_inode_log_format_t *in_f; | 2761 | xfs_inode_log_format_t *in_f; |
2519 | xfs_mount_t *mp = log->l_mp; | 2762 | xfs_mount_t *mp = log->l_mp; |
@@ -2560,8 +2803,7 @@ xlog_recover_inode_pass2( | |||
2560 | error = bp->b_error; | 2803 | error = bp->b_error; |
2561 | if (error) { | 2804 | if (error) { |
2562 | xfs_buf_ioerror_alert(bp, "xlog_recover_do..(read#2)"); | 2805 | xfs_buf_ioerror_alert(bp, "xlog_recover_do..(read#2)"); |
2563 | xfs_buf_relse(bp); | 2806 | goto out_release; |
2564 | goto error; | ||
2565 | } | 2807 | } |
2566 | ASSERT(in_f->ilf_fields & XFS_ILOG_CORE); | 2808 | ASSERT(in_f->ilf_fields & XFS_ILOG_CORE); |
2567 | dip = (xfs_dinode_t *)xfs_buf_offset(bp, in_f->ilf_boffset); | 2809 | dip = (xfs_dinode_t *)xfs_buf_offset(bp, in_f->ilf_boffset); |
@@ -2571,25 +2813,40 @@ xlog_recover_inode_pass2( | |||
2571 | * like an inode! | 2813 | * like an inode! |
2572 | */ | 2814 | */ |
2573 | if (unlikely(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC))) { | 2815 | if (unlikely(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC))) { |
2574 | xfs_buf_relse(bp); | ||
2575 | xfs_alert(mp, | 2816 | xfs_alert(mp, |
2576 | "%s: Bad inode magic number, dip = 0x%p, dino bp = 0x%p, ino = %Ld", | 2817 | "%s: Bad inode magic number, dip = 0x%p, dino bp = 0x%p, ino = %Ld", |
2577 | __func__, dip, bp, in_f->ilf_ino); | 2818 | __func__, dip, bp, in_f->ilf_ino); |
2578 | XFS_ERROR_REPORT("xlog_recover_inode_pass2(1)", | 2819 | XFS_ERROR_REPORT("xlog_recover_inode_pass2(1)", |
2579 | XFS_ERRLEVEL_LOW, mp); | 2820 | XFS_ERRLEVEL_LOW, mp); |
2580 | error = EFSCORRUPTED; | 2821 | error = EFSCORRUPTED; |
2581 | goto error; | 2822 | goto out_release; |
2582 | } | 2823 | } |
2583 | dicp = item->ri_buf[1].i_addr; | 2824 | dicp = item->ri_buf[1].i_addr; |
2584 | if (unlikely(dicp->di_magic != XFS_DINODE_MAGIC)) { | 2825 | if (unlikely(dicp->di_magic != XFS_DINODE_MAGIC)) { |
2585 | xfs_buf_relse(bp); | ||
2586 | xfs_alert(mp, | 2826 | xfs_alert(mp, |
2587 | "%s: Bad inode log record, rec ptr 0x%p, ino %Ld", | 2827 | "%s: Bad inode log record, rec ptr 0x%p, ino %Ld", |
2588 | __func__, item, in_f->ilf_ino); | 2828 | __func__, item, in_f->ilf_ino); |
2589 | XFS_ERROR_REPORT("xlog_recover_inode_pass2(2)", | 2829 | XFS_ERROR_REPORT("xlog_recover_inode_pass2(2)", |
2590 | XFS_ERRLEVEL_LOW, mp); | 2830 | XFS_ERRLEVEL_LOW, mp); |
2591 | error = EFSCORRUPTED; | 2831 | error = EFSCORRUPTED; |
2592 | goto error; | 2832 | goto out_release; |
2833 | } | ||
2834 | |||
2835 | /* | ||
2836 | * If the inode has an LSN in it, recover the inode only if it's less | ||
2837 | * than the lsn of the transaction we are replaying. Note: we still | ||
2838 | * need to replay an owner change even though the inode is more recent | ||
2839 | * than the transaction as there is no guarantee that all the btree | ||
2840 | * blocks are more recent than this transaction, too. | ||
2841 | */ | ||
2842 | if (dip->di_version >= 3) { | ||
2843 | xfs_lsn_t lsn = be64_to_cpu(dip->di_lsn); | ||
2844 | |||
2845 | if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) { | ||
2846 | trace_xfs_log_recover_inode_skip(log, in_f); | ||
2847 | error = 0; | ||
2848 | goto out_owner_change; | ||
2849 | } | ||
2593 | } | 2850 | } |
2594 | 2851 | ||
2595 | /* | 2852 | /* |
@@ -2610,10 +2867,9 @@ xlog_recover_inode_pass2( | |||
2610 | dicp->di_flushiter < (DI_MAX_FLUSH >> 1)) { | 2867 | dicp->di_flushiter < (DI_MAX_FLUSH >> 1)) { |
2611 | /* do nothing */ | 2868 | /* do nothing */ |
2612 | } else { | 2869 | } else { |
2613 | xfs_buf_relse(bp); | ||
2614 | trace_xfs_log_recover_inode_skip(log, in_f); | 2870 | trace_xfs_log_recover_inode_skip(log, in_f); |
2615 | error = 0; | 2871 | error = 0; |
2616 | goto error; | 2872 | goto out_release; |
2617 | } | 2873 | } |
2618 | } | 2874 | } |
2619 | 2875 | ||
@@ -2625,13 +2881,12 @@ xlog_recover_inode_pass2( | |||
2625 | (dicp->di_format != XFS_DINODE_FMT_BTREE)) { | 2881 | (dicp->di_format != XFS_DINODE_FMT_BTREE)) { |
2626 | XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(3)", | 2882 | XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(3)", |
2627 | XFS_ERRLEVEL_LOW, mp, dicp); | 2883 | XFS_ERRLEVEL_LOW, mp, dicp); |
2628 | xfs_buf_relse(bp); | ||
2629 | xfs_alert(mp, | 2884 | xfs_alert(mp, |
2630 | "%s: Bad regular inode log record, rec ptr 0x%p, " | 2885 | "%s: Bad regular inode log record, rec ptr 0x%p, " |
2631 | "ino ptr = 0x%p, ino bp = 0x%p, ino %Ld", | 2886 | "ino ptr = 0x%p, ino bp = 0x%p, ino %Ld", |
2632 | __func__, item, dip, bp, in_f->ilf_ino); | 2887 | __func__, item, dip, bp, in_f->ilf_ino); |
2633 | error = EFSCORRUPTED; | 2888 | error = EFSCORRUPTED; |
2634 | goto error; | 2889 | goto out_release; |
2635 | } | 2890 | } |
2636 | } else if (unlikely(S_ISDIR(dicp->di_mode))) { | 2891 | } else if (unlikely(S_ISDIR(dicp->di_mode))) { |
2637 | if ((dicp->di_format != XFS_DINODE_FMT_EXTENTS) && | 2892 | if ((dicp->di_format != XFS_DINODE_FMT_EXTENTS) && |
@@ -2639,19 +2894,17 @@ xlog_recover_inode_pass2( | |||
2639 | (dicp->di_format != XFS_DINODE_FMT_LOCAL)) { | 2894 | (dicp->di_format != XFS_DINODE_FMT_LOCAL)) { |
2640 | XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(4)", | 2895 | XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(4)", |
2641 | XFS_ERRLEVEL_LOW, mp, dicp); | 2896 | XFS_ERRLEVEL_LOW, mp, dicp); |
2642 | xfs_buf_relse(bp); | ||
2643 | xfs_alert(mp, | 2897 | xfs_alert(mp, |
2644 | "%s: Bad dir inode log record, rec ptr 0x%p, " | 2898 | "%s: Bad dir inode log record, rec ptr 0x%p, " |
2645 | "ino ptr = 0x%p, ino bp = 0x%p, ino %Ld", | 2899 | "ino ptr = 0x%p, ino bp = 0x%p, ino %Ld", |
2646 | __func__, item, dip, bp, in_f->ilf_ino); | 2900 | __func__, item, dip, bp, in_f->ilf_ino); |
2647 | error = EFSCORRUPTED; | 2901 | error = EFSCORRUPTED; |
2648 | goto error; | 2902 | goto out_release; |
2649 | } | 2903 | } |
2650 | } | 2904 | } |
2651 | if (unlikely(dicp->di_nextents + dicp->di_anextents > dicp->di_nblocks)){ | 2905 | if (unlikely(dicp->di_nextents + dicp->di_anextents > dicp->di_nblocks)){ |
2652 | XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(5)", | 2906 | XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(5)", |
2653 | XFS_ERRLEVEL_LOW, mp, dicp); | 2907 | XFS_ERRLEVEL_LOW, mp, dicp); |
2654 | xfs_buf_relse(bp); | ||
2655 | xfs_alert(mp, | 2908 | xfs_alert(mp, |
2656 | "%s: Bad inode log record, rec ptr 0x%p, dino ptr 0x%p, " | 2909 | "%s: Bad inode log record, rec ptr 0x%p, dino ptr 0x%p, " |
2657 | "dino bp 0x%p, ino %Ld, total extents = %d, nblocks = %Ld", | 2910 | "dino bp 0x%p, ino %Ld, total extents = %d, nblocks = %Ld", |
@@ -2659,29 +2912,27 @@ xlog_recover_inode_pass2( | |||
2659 | dicp->di_nextents + dicp->di_anextents, | 2912 | dicp->di_nextents + dicp->di_anextents, |
2660 | dicp->di_nblocks); | 2913 | dicp->di_nblocks); |
2661 | error = EFSCORRUPTED; | 2914 | error = EFSCORRUPTED; |
2662 | goto error; | 2915 | goto out_release; |
2663 | } | 2916 | } |
2664 | if (unlikely(dicp->di_forkoff > mp->m_sb.sb_inodesize)) { | 2917 | if (unlikely(dicp->di_forkoff > mp->m_sb.sb_inodesize)) { |
2665 | XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(6)", | 2918 | XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(6)", |
2666 | XFS_ERRLEVEL_LOW, mp, dicp); | 2919 | XFS_ERRLEVEL_LOW, mp, dicp); |
2667 | xfs_buf_relse(bp); | ||
2668 | xfs_alert(mp, | 2920 | xfs_alert(mp, |
2669 | "%s: Bad inode log record, rec ptr 0x%p, dino ptr 0x%p, " | 2921 | "%s: Bad inode log record, rec ptr 0x%p, dino ptr 0x%p, " |
2670 | "dino bp 0x%p, ino %Ld, forkoff 0x%x", __func__, | 2922 | "dino bp 0x%p, ino %Ld, forkoff 0x%x", __func__, |
2671 | item, dip, bp, in_f->ilf_ino, dicp->di_forkoff); | 2923 | item, dip, bp, in_f->ilf_ino, dicp->di_forkoff); |
2672 | error = EFSCORRUPTED; | 2924 | error = EFSCORRUPTED; |
2673 | goto error; | 2925 | goto out_release; |
2674 | } | 2926 | } |
2675 | isize = xfs_icdinode_size(dicp->di_version); | 2927 | isize = xfs_icdinode_size(dicp->di_version); |
2676 | if (unlikely(item->ri_buf[1].i_len > isize)) { | 2928 | if (unlikely(item->ri_buf[1].i_len > isize)) { |
2677 | XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(7)", | 2929 | XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(7)", |
2678 | XFS_ERRLEVEL_LOW, mp, dicp); | 2930 | XFS_ERRLEVEL_LOW, mp, dicp); |
2679 | xfs_buf_relse(bp); | ||
2680 | xfs_alert(mp, | 2931 | xfs_alert(mp, |
2681 | "%s: Bad inode log record length %d, rec ptr 0x%p", | 2932 | "%s: Bad inode log record length %d, rec ptr 0x%p", |
2682 | __func__, item->ri_buf[1].i_len, item); | 2933 | __func__, item->ri_buf[1].i_len, item); |
2683 | error = EFSCORRUPTED; | 2934 | error = EFSCORRUPTED; |
2684 | goto error; | 2935 | goto out_release; |
2685 | } | 2936 | } |
2686 | 2937 | ||
2687 | /* The core is in in-core format */ | 2938 | /* The core is in in-core format */ |
@@ -2707,7 +2958,7 @@ xlog_recover_inode_pass2( | |||
2707 | } | 2958 | } |
2708 | 2959 | ||
2709 | if (in_f->ilf_size == 2) | 2960 | if (in_f->ilf_size == 2) |
2710 | goto write_inode_buffer; | 2961 | goto out_owner_change; |
2711 | len = item->ri_buf[2].i_len; | 2962 | len = item->ri_buf[2].i_len; |
2712 | src = item->ri_buf[2].i_addr; | 2963 | src = item->ri_buf[2].i_addr; |
2713 | ASSERT(in_f->ilf_size <= 4); | 2964 | ASSERT(in_f->ilf_size <= 4); |
@@ -2768,19 +3019,23 @@ xlog_recover_inode_pass2( | |||
2768 | default: | 3019 | default: |
2769 | xfs_warn(log->l_mp, "%s: Invalid flag", __func__); | 3020 | xfs_warn(log->l_mp, "%s: Invalid flag", __func__); |
2770 | ASSERT(0); | 3021 | ASSERT(0); |
2771 | xfs_buf_relse(bp); | ||
2772 | error = EIO; | 3022 | error = EIO; |
2773 | goto error; | 3023 | goto out_release; |
2774 | } | 3024 | } |
2775 | } | 3025 | } |
2776 | 3026 | ||
2777 | write_inode_buffer: | 3027 | out_owner_change: |
3028 | if (in_f->ilf_fields & (XFS_ILOG_DOWNER|XFS_ILOG_AOWNER)) | ||
3029 | error = xfs_recover_inode_owner_change(mp, dip, in_f, | ||
3030 | buffer_list); | ||
2778 | /* re-generate the checksum. */ | 3031 | /* re-generate the checksum. */ |
2779 | xfs_dinode_calc_crc(log->l_mp, dip); | 3032 | xfs_dinode_calc_crc(log->l_mp, dip); |
2780 | 3033 | ||
2781 | ASSERT(bp->b_target->bt_mount == mp); | 3034 | ASSERT(bp->b_target->bt_mount == mp); |
2782 | bp->b_iodone = xlog_recover_iodone; | 3035 | bp->b_iodone = xlog_recover_iodone; |
2783 | xfs_buf_delwri_queue(bp, buffer_list); | 3036 | xfs_buf_delwri_queue(bp, buffer_list); |
3037 | |||
3038 | out_release: | ||
2784 | xfs_buf_relse(bp); | 3039 | xfs_buf_relse(bp); |
2785 | error: | 3040 | error: |
2786 | if (need_free) | 3041 | if (need_free) |
@@ -2822,7 +3077,8 @@ STATIC int | |||
2822 | xlog_recover_dquot_pass2( | 3077 | xlog_recover_dquot_pass2( |
2823 | struct xlog *log, | 3078 | struct xlog *log, |
2824 | struct list_head *buffer_list, | 3079 | struct list_head *buffer_list, |
2825 | struct xlog_recover_item *item) | 3080 | struct xlog_recover_item *item, |
3081 | xfs_lsn_t current_lsn) | ||
2826 | { | 3082 | { |
2827 | xfs_mount_t *mp = log->l_mp; | 3083 | xfs_mount_t *mp = log->l_mp; |
2828 | xfs_buf_t *bp; | 3084 | xfs_buf_t *bp; |
@@ -2896,6 +3152,19 @@ xlog_recover_dquot_pass2( | |||
2896 | return XFS_ERROR(EIO); | 3152 | return XFS_ERROR(EIO); |
2897 | } | 3153 | } |
2898 | 3154 | ||
3155 | /* | ||
3156 | * If the dquot has an LSN in it, recover the dquot only if it's less | ||
3157 | * than the lsn of the transaction we are replaying. | ||
3158 | */ | ||
3159 | if (xfs_sb_version_hascrc(&mp->m_sb)) { | ||
3160 | struct xfs_dqblk *dqb = (struct xfs_dqblk *)ddq; | ||
3161 | xfs_lsn_t lsn = be64_to_cpu(dqb->dd_lsn); | ||
3162 | |||
3163 | if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) { | ||
3164 | goto out_release; | ||
3165 | } | ||
3166 | } | ||
3167 | |||
2899 | memcpy(ddq, recddq, item->ri_buf[1].i_len); | 3168 | memcpy(ddq, recddq, item->ri_buf[1].i_len); |
2900 | if (xfs_sb_version_hascrc(&mp->m_sb)) { | 3169 | if (xfs_sb_version_hascrc(&mp->m_sb)) { |
2901 | xfs_update_cksum((char *)ddq, sizeof(struct xfs_dqblk), | 3170 | xfs_update_cksum((char *)ddq, sizeof(struct xfs_dqblk), |
@@ -2906,9 +3175,10 @@ xlog_recover_dquot_pass2( | |||
2906 | ASSERT(bp->b_target->bt_mount == mp); | 3175 | ASSERT(bp->b_target->bt_mount == mp); |
2907 | bp->b_iodone = xlog_recover_iodone; | 3176 | bp->b_iodone = xlog_recover_iodone; |
2908 | xfs_buf_delwri_queue(bp, buffer_list); | 3177 | xfs_buf_delwri_queue(bp, buffer_list); |
2909 | xfs_buf_relse(bp); | ||
2910 | 3178 | ||
2911 | return (0); | 3179 | out_release: |
3180 | xfs_buf_relse(bp); | ||
3181 | return 0; | ||
2912 | } | 3182 | } |
2913 | 3183 | ||
2914 | /* | 3184 | /* |
@@ -3116,6 +3386,106 @@ xlog_recover_free_trans( | |||
3116 | kmem_free(trans); | 3386 | kmem_free(trans); |
3117 | } | 3387 | } |
3118 | 3388 | ||
3389 | STATIC void | ||
3390 | xlog_recover_buffer_ra_pass2( | ||
3391 | struct xlog *log, | ||
3392 | struct xlog_recover_item *item) | ||
3393 | { | ||
3394 | struct xfs_buf_log_format *buf_f = item->ri_buf[0].i_addr; | ||
3395 | struct xfs_mount *mp = log->l_mp; | ||
3396 | |||
3397 | if (xlog_peek_buffer_cancelled(log, buf_f->blf_blkno, | ||
3398 | buf_f->blf_len, buf_f->blf_flags)) { | ||
3399 | return; | ||
3400 | } | ||
3401 | |||
3402 | xfs_buf_readahead(mp->m_ddev_targp, buf_f->blf_blkno, | ||
3403 | buf_f->blf_len, NULL); | ||
3404 | } | ||
3405 | |||
3406 | STATIC void | ||
3407 | xlog_recover_inode_ra_pass2( | ||
3408 | struct xlog *log, | ||
3409 | struct xlog_recover_item *item) | ||
3410 | { | ||
3411 | struct xfs_inode_log_format ilf_buf; | ||
3412 | struct xfs_inode_log_format *ilfp; | ||
3413 | struct xfs_mount *mp = log->l_mp; | ||
3414 | int error; | ||
3415 | |||
3416 | if (item->ri_buf[0].i_len == sizeof(struct xfs_inode_log_format)) { | ||
3417 | ilfp = item->ri_buf[0].i_addr; | ||
3418 | } else { | ||
3419 | ilfp = &ilf_buf; | ||
3420 | memset(ilfp, 0, sizeof(*ilfp)); | ||
3421 | error = xfs_inode_item_format_convert(&item->ri_buf[0], ilfp); | ||
3422 | if (error) | ||
3423 | return; | ||
3424 | } | ||
3425 | |||
3426 | if (xlog_peek_buffer_cancelled(log, ilfp->ilf_blkno, ilfp->ilf_len, 0)) | ||
3427 | return; | ||
3428 | |||
3429 | xfs_buf_readahead(mp->m_ddev_targp, ilfp->ilf_blkno, | ||
3430 | ilfp->ilf_len, &xfs_inode_buf_ra_ops); | ||
3431 | } | ||
3432 | |||
3433 | STATIC void | ||
3434 | xlog_recover_dquot_ra_pass2( | ||
3435 | struct xlog *log, | ||
3436 | struct xlog_recover_item *item) | ||
3437 | { | ||
3438 | struct xfs_mount *mp = log->l_mp; | ||
3439 | struct xfs_disk_dquot *recddq; | ||
3440 | struct xfs_dq_logformat *dq_f; | ||
3441 | uint type; | ||
3442 | |||
3443 | |||
3444 | if (mp->m_qflags == 0) | ||
3445 | return; | ||
3446 | |||
3447 | recddq = item->ri_buf[1].i_addr; | ||
3448 | if (recddq == NULL) | ||
3449 | return; | ||
3450 | if (item->ri_buf[1].i_len < sizeof(struct xfs_disk_dquot)) | ||
3451 | return; | ||
3452 | |||
3453 | type = recddq->d_flags & (XFS_DQ_USER | XFS_DQ_PROJ | XFS_DQ_GROUP); | ||
3454 | ASSERT(type); | ||
3455 | if (log->l_quotaoffs_flag & type) | ||
3456 | return; | ||
3457 | |||
3458 | dq_f = item->ri_buf[0].i_addr; | ||
3459 | ASSERT(dq_f); | ||
3460 | ASSERT(dq_f->qlf_len == 1); | ||
3461 | |||
3462 | xfs_buf_readahead(mp->m_ddev_targp, dq_f->qlf_blkno, | ||
3463 | XFS_FSB_TO_BB(mp, dq_f->qlf_len), NULL); | ||
3464 | } | ||
3465 | |||
3466 | STATIC void | ||
3467 | xlog_recover_ra_pass2( | ||
3468 | struct xlog *log, | ||
3469 | struct xlog_recover_item *item) | ||
3470 | { | ||
3471 | switch (ITEM_TYPE(item)) { | ||
3472 | case XFS_LI_BUF: | ||
3473 | xlog_recover_buffer_ra_pass2(log, item); | ||
3474 | break; | ||
3475 | case XFS_LI_INODE: | ||
3476 | xlog_recover_inode_ra_pass2(log, item); | ||
3477 | break; | ||
3478 | case XFS_LI_DQUOT: | ||
3479 | xlog_recover_dquot_ra_pass2(log, item); | ||
3480 | break; | ||
3481 | case XFS_LI_EFI: | ||
3482 | case XFS_LI_EFD: | ||
3483 | case XFS_LI_QUOTAOFF: | ||
3484 | default: | ||
3485 | break; | ||
3486 | } | ||
3487 | } | ||
3488 | |||
3119 | STATIC int | 3489 | STATIC int |
3120 | xlog_recover_commit_pass1( | 3490 | xlog_recover_commit_pass1( |
3121 | struct xlog *log, | 3491 | struct xlog *log, |
@@ -3155,15 +3525,18 @@ xlog_recover_commit_pass2( | |||
3155 | 3525 | ||
3156 | switch (ITEM_TYPE(item)) { | 3526 | switch (ITEM_TYPE(item)) { |
3157 | case XFS_LI_BUF: | 3527 | case XFS_LI_BUF: |
3158 | return xlog_recover_buffer_pass2(log, buffer_list, item); | 3528 | return xlog_recover_buffer_pass2(log, buffer_list, item, |
3529 | trans->r_lsn); | ||
3159 | case XFS_LI_INODE: | 3530 | case XFS_LI_INODE: |
3160 | return xlog_recover_inode_pass2(log, buffer_list, item); | 3531 | return xlog_recover_inode_pass2(log, buffer_list, item, |
3532 | trans->r_lsn); | ||
3161 | case XFS_LI_EFI: | 3533 | case XFS_LI_EFI: |
3162 | return xlog_recover_efi_pass2(log, item, trans->r_lsn); | 3534 | return xlog_recover_efi_pass2(log, item, trans->r_lsn); |
3163 | case XFS_LI_EFD: | 3535 | case XFS_LI_EFD: |
3164 | return xlog_recover_efd_pass2(log, item); | 3536 | return xlog_recover_efd_pass2(log, item); |
3165 | case XFS_LI_DQUOT: | 3537 | case XFS_LI_DQUOT: |
3166 | return xlog_recover_dquot_pass2(log, buffer_list, item); | 3538 | return xlog_recover_dquot_pass2(log, buffer_list, item, |
3539 | trans->r_lsn); | ||
3167 | case XFS_LI_ICREATE: | 3540 | case XFS_LI_ICREATE: |
3168 | return xlog_recover_do_icreate_pass2(log, buffer_list, item); | 3541 | return xlog_recover_do_icreate_pass2(log, buffer_list, item); |
3169 | case XFS_LI_QUOTAOFF: | 3542 | case XFS_LI_QUOTAOFF: |
@@ -3177,6 +3550,26 @@ xlog_recover_commit_pass2( | |||
3177 | } | 3550 | } |
3178 | } | 3551 | } |
3179 | 3552 | ||
3553 | STATIC int | ||
3554 | xlog_recover_items_pass2( | ||
3555 | struct xlog *log, | ||
3556 | struct xlog_recover *trans, | ||
3557 | struct list_head *buffer_list, | ||
3558 | struct list_head *item_list) | ||
3559 | { | ||
3560 | struct xlog_recover_item *item; | ||
3561 | int error = 0; | ||
3562 | |||
3563 | list_for_each_entry(item, item_list, ri_list) { | ||
3564 | error = xlog_recover_commit_pass2(log, trans, | ||
3565 | buffer_list, item); | ||
3566 | if (error) | ||
3567 | return error; | ||
3568 | } | ||
3569 | |||
3570 | return error; | ||
3571 | } | ||
3572 | |||
3180 | /* | 3573 | /* |
3181 | * Perform the transaction. | 3574 | * Perform the transaction. |
3182 | * | 3575 | * |
@@ -3189,9 +3582,16 @@ xlog_recover_commit_trans( | |||
3189 | struct xlog_recover *trans, | 3582 | struct xlog_recover *trans, |
3190 | int pass) | 3583 | int pass) |
3191 | { | 3584 | { |
3192 | int error = 0, error2; | 3585 | int error = 0; |
3193 | xlog_recover_item_t *item; | 3586 | int error2; |
3194 | LIST_HEAD (buffer_list); | 3587 | int items_queued = 0; |
3588 | struct xlog_recover_item *item; | ||
3589 | struct xlog_recover_item *next; | ||
3590 | LIST_HEAD (buffer_list); | ||
3591 | LIST_HEAD (ra_list); | ||
3592 | LIST_HEAD (done_list); | ||
3593 | |||
3594 | #define XLOG_RECOVER_COMMIT_QUEUE_MAX 100 | ||
3195 | 3595 | ||
3196 | hlist_del(&trans->r_list); | 3596 | hlist_del(&trans->r_list); |
3197 | 3597 | ||
@@ -3199,14 +3599,22 @@ xlog_recover_commit_trans( | |||
3199 | if (error) | 3599 | if (error) |
3200 | return error; | 3600 | return error; |
3201 | 3601 | ||
3202 | list_for_each_entry(item, &trans->r_itemq, ri_list) { | 3602 | list_for_each_entry_safe(item, next, &trans->r_itemq, ri_list) { |
3203 | switch (pass) { | 3603 | switch (pass) { |
3204 | case XLOG_RECOVER_PASS1: | 3604 | case XLOG_RECOVER_PASS1: |
3205 | error = xlog_recover_commit_pass1(log, trans, item); | 3605 | error = xlog_recover_commit_pass1(log, trans, item); |
3206 | break; | 3606 | break; |
3207 | case XLOG_RECOVER_PASS2: | 3607 | case XLOG_RECOVER_PASS2: |
3208 | error = xlog_recover_commit_pass2(log, trans, | 3608 | xlog_recover_ra_pass2(log, item); |
3209 | &buffer_list, item); | 3609 | list_move_tail(&item->ri_list, &ra_list); |
3610 | items_queued++; | ||
3611 | if (items_queued >= XLOG_RECOVER_COMMIT_QUEUE_MAX) { | ||
3612 | error = xlog_recover_items_pass2(log, trans, | ||
3613 | &buffer_list, &ra_list); | ||
3614 | list_splice_tail_init(&ra_list, &done_list); | ||
3615 | items_queued = 0; | ||
3616 | } | ||
3617 | |||
3210 | break; | 3618 | break; |
3211 | default: | 3619 | default: |
3212 | ASSERT(0); | 3620 | ASSERT(0); |
@@ -3216,9 +3624,19 @@ xlog_recover_commit_trans( | |||
3216 | goto out; | 3624 | goto out; |
3217 | } | 3625 | } |
3218 | 3626 | ||
3627 | out: | ||
3628 | if (!list_empty(&ra_list)) { | ||
3629 | if (!error) | ||
3630 | error = xlog_recover_items_pass2(log, trans, | ||
3631 | &buffer_list, &ra_list); | ||
3632 | list_splice_tail_init(&ra_list, &done_list); | ||
3633 | } | ||
3634 | |||
3635 | if (!list_empty(&done_list)) | ||
3636 | list_splice_init(&done_list, &trans->r_itemq); | ||
3637 | |||
3219 | xlog_recover_free_trans(trans); | 3638 | xlog_recover_free_trans(trans); |
3220 | 3639 | ||
3221 | out: | ||
3222 | error2 = xfs_buf_delwri_submit(&buffer_list); | 3640 | error2 = xfs_buf_delwri_submit(&buffer_list); |
3223 | return error ? error : error2; | 3641 | return error ? error : error2; |
3224 | } | 3642 | } |
@@ -3376,7 +3794,7 @@ xlog_recover_process_efi( | |||
3376 | } | 3794 | } |
3377 | 3795 | ||
3378 | tp = xfs_trans_alloc(mp, 0); | 3796 | tp = xfs_trans_alloc(mp, 0); |
3379 | error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, 0, 0); | 3797 | error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0); |
3380 | if (error) | 3798 | if (error) |
3381 | goto abort_error; | 3799 | goto abort_error; |
3382 | efdp = xfs_trans_get_efd(tp, efip, efip->efi_format.efi_nextents); | 3800 | efdp = xfs_trans_get_efd(tp, efip, efip->efi_format.efi_nextents); |
@@ -3482,8 +3900,7 @@ xlog_recover_clear_agi_bucket( | |||
3482 | int error; | 3900 | int error; |
3483 | 3901 | ||
3484 | tp = xfs_trans_alloc(mp, XFS_TRANS_CLEAR_AGI_BUCKET); | 3902 | tp = xfs_trans_alloc(mp, XFS_TRANS_CLEAR_AGI_BUCKET); |
3485 | error = xfs_trans_reserve(tp, 0, XFS_CLEAR_AGI_BUCKET_LOG_RES(mp), | 3903 | error = xfs_trans_reserve(tp, &M_RES(mp)->tr_clearagi, 0, 0); |
3486 | 0, 0, 0); | ||
3487 | if (error) | 3904 | if (error) |
3488 | goto out_abort; | 3905 | goto out_abort; |
3489 | 3906 | ||