diff options
Diffstat (limited to 'fs/xfs/xfs_log_recover.c')
-rw-r--r-- | fs/xfs/xfs_log_recover.c | 114 |
1 files changed, 105 insertions, 9 deletions
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 93f03ec17eec..7cf5e4eafe28 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c | |||
@@ -1599,10 +1599,43 @@ xlog_recover_add_to_trans( | |||
1599 | } | 1599 | } |
1600 | 1600 | ||
1601 | /* | 1601 | /* |
1602 | * Sort the log items in the transaction. Cancelled buffers need | 1602 | * Sort the log items in the transaction. |
1603 | * to be put first so they are processed before any items that might | 1603 | * |
1604 | * modify the buffers. If they are cancelled, then the modifications | 1604 | * The ordering constraints are defined by the inode allocation and unlink |
1605 | * don't need to be replayed. | 1605 | * behaviour. The rules are: |
1606 | * | ||
1607 | * 1. Every item is only logged once in a given transaction. Hence it | ||
1608 | * represents the last logged state of the item. Hence ordering is | ||
1609 | * dependent on the order in which operations need to be performed so | ||
1610 | * required initial conditions are always met. | ||
1611 | * | ||
1612 | * 2. Cancelled buffers are recorded in pass 1 in a separate table and | ||
1613 | * there's nothing to replay from them so we can simply cull them | ||
1614 | * from the transaction. However, we can't do that until after we've | ||
1615 | * replayed all the other items because they may be dependent on the | ||
1616 | * cancelled buffer and replaying the cancelled buffer can remove it | ||
1617 | * form the cancelled buffer table. Hence they have tobe done last. | ||
1618 | * | ||
1619 | * 3. Inode allocation buffers must be replayed before inode items that | ||
1620 | * read the buffer and replay changes into it. | ||
1621 | * | ||
1622 | * 4. Inode unlink buffers must be replayed after inode items are replayed. | ||
1623 | * This ensures that inodes are completely flushed to the inode buffer | ||
1624 | * in a "free" state before we remove the unlinked inode list pointer. | ||
1625 | * | ||
1626 | * Hence the ordering needs to be inode allocation buffers first, inode items | ||
1627 | * second, inode unlink buffers third and cancelled buffers last. | ||
1628 | * | ||
1629 | * But there's a problem with that - we can't tell an inode allocation buffer | ||
1630 | * apart from a regular buffer, so we can't separate them. We can, however, | ||
1631 | * tell an inode unlink buffer from the others, and so we can separate them out | ||
1632 | * from all the other buffers and move them to last. | ||
1633 | * | ||
1634 | * Hence, 4 lists, in order from head to tail: | ||
1635 | * - buffer_list for all buffers except cancelled/inode unlink buffers | ||
1636 | * - item_list for all non-buffer items | ||
1637 | * - inode_buffer_list for inode unlink buffers | ||
1638 | * - cancel_list for the cancelled buffers | ||
1606 | */ | 1639 | */ |
1607 | STATIC int | 1640 | STATIC int |
1608 | xlog_recover_reorder_trans( | 1641 | xlog_recover_reorder_trans( |
@@ -1612,6 +1645,10 @@ xlog_recover_reorder_trans( | |||
1612 | { | 1645 | { |
1613 | xlog_recover_item_t *item, *n; | 1646 | xlog_recover_item_t *item, *n; |
1614 | LIST_HEAD(sort_list); | 1647 | LIST_HEAD(sort_list); |
1648 | LIST_HEAD(cancel_list); | ||
1649 | LIST_HEAD(buffer_list); | ||
1650 | LIST_HEAD(inode_buffer_list); | ||
1651 | LIST_HEAD(inode_list); | ||
1615 | 1652 | ||
1616 | list_splice_init(&trans->r_itemq, &sort_list); | 1653 | list_splice_init(&trans->r_itemq, &sort_list); |
1617 | list_for_each_entry_safe(item, n, &sort_list, ri_list) { | 1654 | list_for_each_entry_safe(item, n, &sort_list, ri_list) { |
@@ -1619,12 +1656,18 @@ xlog_recover_reorder_trans( | |||
1619 | 1656 | ||
1620 | switch (ITEM_TYPE(item)) { | 1657 | switch (ITEM_TYPE(item)) { |
1621 | case XFS_LI_BUF: | 1658 | case XFS_LI_BUF: |
1622 | if (!(buf_f->blf_flags & XFS_BLF_CANCEL)) { | 1659 | if (buf_f->blf_flags & XFS_BLF_CANCEL) { |
1623 | trace_xfs_log_recover_item_reorder_head(log, | 1660 | trace_xfs_log_recover_item_reorder_head(log, |
1624 | trans, item, pass); | 1661 | trans, item, pass); |
1625 | list_move(&item->ri_list, &trans->r_itemq); | 1662 | list_move(&item->ri_list, &cancel_list); |
1663 | break; | ||
1664 | } | ||
1665 | if (buf_f->blf_flags & XFS_BLF_INODE_BUF) { | ||
1666 | list_move(&item->ri_list, &inode_buffer_list); | ||
1626 | break; | 1667 | break; |
1627 | } | 1668 | } |
1669 | list_move_tail(&item->ri_list, &buffer_list); | ||
1670 | break; | ||
1628 | case XFS_LI_INODE: | 1671 | case XFS_LI_INODE: |
1629 | case XFS_LI_DQUOT: | 1672 | case XFS_LI_DQUOT: |
1630 | case XFS_LI_QUOTAOFF: | 1673 | case XFS_LI_QUOTAOFF: |
@@ -1632,7 +1675,7 @@ xlog_recover_reorder_trans( | |||
1632 | case XFS_LI_EFI: | 1675 | case XFS_LI_EFI: |
1633 | trace_xfs_log_recover_item_reorder_tail(log, | 1676 | trace_xfs_log_recover_item_reorder_tail(log, |
1634 | trans, item, pass); | 1677 | trans, item, pass); |
1635 | list_move_tail(&item->ri_list, &trans->r_itemq); | 1678 | list_move_tail(&item->ri_list, &inode_list); |
1636 | break; | 1679 | break; |
1637 | default: | 1680 | default: |
1638 | xfs_warn(log->l_mp, | 1681 | xfs_warn(log->l_mp, |
@@ -1643,6 +1686,14 @@ xlog_recover_reorder_trans( | |||
1643 | } | 1686 | } |
1644 | } | 1687 | } |
1645 | ASSERT(list_empty(&sort_list)); | 1688 | ASSERT(list_empty(&sort_list)); |
1689 | if (!list_empty(&buffer_list)) | ||
1690 | list_splice(&buffer_list, &trans->r_itemq); | ||
1691 | if (!list_empty(&inode_list)) | ||
1692 | list_splice_tail(&inode_list, &trans->r_itemq); | ||
1693 | if (!list_empty(&inode_buffer_list)) | ||
1694 | list_splice_tail(&inode_buffer_list, &trans->r_itemq); | ||
1695 | if (!list_empty(&cancel_list)) | ||
1696 | list_splice_tail(&cancel_list, &trans->r_itemq); | ||
1646 | return 0; | 1697 | return 0; |
1647 | } | 1698 | } |
1648 | 1699 | ||
@@ -1794,7 +1845,13 @@ xlog_recover_do_inode_buffer( | |||
1794 | xfs_agino_t *buffer_nextp; | 1845 | xfs_agino_t *buffer_nextp; |
1795 | 1846 | ||
1796 | trace_xfs_log_recover_buf_inode_buf(mp->m_log, buf_f); | 1847 | trace_xfs_log_recover_buf_inode_buf(mp->m_log, buf_f); |
1797 | bp->b_ops = &xfs_inode_buf_ops; | 1848 | |
1849 | /* | ||
1850 | * Post recovery validation only works properly on CRC enabled | ||
1851 | * filesystems. | ||
1852 | */ | ||
1853 | if (xfs_sb_version_hascrc(&mp->m_sb)) | ||
1854 | bp->b_ops = &xfs_inode_buf_ops; | ||
1798 | 1855 | ||
1799 | inodes_per_buf = BBTOB(bp->b_io_length) >> mp->m_sb.sb_inodelog; | 1856 | inodes_per_buf = BBTOB(bp->b_io_length) >> mp->m_sb.sb_inodelog; |
1800 | for (i = 0; i < inodes_per_buf; i++) { | 1857 | for (i = 0; i < inodes_per_buf; i++) { |
@@ -1861,6 +1918,15 @@ xlog_recover_do_inode_buffer( | |||
1861 | buffer_nextp = (xfs_agino_t *)xfs_buf_offset(bp, | 1918 | buffer_nextp = (xfs_agino_t *)xfs_buf_offset(bp, |
1862 | next_unlinked_offset); | 1919 | next_unlinked_offset); |
1863 | *buffer_nextp = *logged_nextp; | 1920 | *buffer_nextp = *logged_nextp; |
1921 | |||
1922 | /* | ||
1923 | * If necessary, recalculate the CRC in the on-disk inode. We | ||
1924 | * have to leave the inode in a consistent state for whoever | ||
1925 | * reads it next.... | ||
1926 | */ | ||
1927 | xfs_dinode_calc_crc(mp, (struct xfs_dinode *) | ||
1928 | xfs_buf_offset(bp, i * mp->m_sb.sb_inodesize)); | ||
1929 | |||
1864 | } | 1930 | } |
1865 | 1931 | ||
1866 | return 0; | 1932 | return 0; |
@@ -2097,6 +2163,17 @@ xlog_recover_do_reg_buffer( | |||
2097 | ((uint)bit << XFS_BLF_SHIFT) + (nbits << XFS_BLF_SHIFT)); | 2163 | ((uint)bit << XFS_BLF_SHIFT) + (nbits << XFS_BLF_SHIFT)); |
2098 | 2164 | ||
2099 | /* | 2165 | /* |
2166 | * The dirty regions logged in the buffer, even though | ||
2167 | * contiguous, may span multiple chunks. This is because the | ||
2168 | * dirty region may span a physical page boundary in a buffer | ||
2169 | * and hence be split into two separate vectors for writing into | ||
2170 | * the log. Hence we need to trim nbits back to the length of | ||
2171 | * the current region being copied out of the log. | ||
2172 | */ | ||
2173 | if (item->ri_buf[i].i_len < (nbits << XFS_BLF_SHIFT)) | ||
2174 | nbits = item->ri_buf[i].i_len >> XFS_BLF_SHIFT; | ||
2175 | |||
2176 | /* | ||
2100 | * Do a sanity check if this is a dquot buffer. Just checking | 2177 | * Do a sanity check if this is a dquot buffer. Just checking |
2101 | * the first dquot in the buffer should do. XXXThis is | 2178 | * the first dquot in the buffer should do. XXXThis is |
2102 | * probably a good thing to do for other buf types also. | 2179 | * probably a good thing to do for other buf types also. |
@@ -2134,7 +2211,16 @@ xlog_recover_do_reg_buffer( | |||
2134 | /* Shouldn't be any more regions */ | 2211 | /* Shouldn't be any more regions */ |
2135 | ASSERT(i == item->ri_total); | 2212 | ASSERT(i == item->ri_total); |
2136 | 2213 | ||
2137 | xlog_recovery_validate_buf_type(mp, bp, buf_f); | 2214 | /* |
2215 | * We can only do post recovery validation on items on CRC enabled | ||
2216 | * fielsystems as we need to know when the buffer was written to be able | ||
2217 | * to determine if we should have replayed the item. If we replay old | ||
2218 | * metadata over a newer buffer, then it will enter a temporarily | ||
2219 | * inconsistent state resulting in verification failures. Hence for now | ||
2220 | * just avoid the verification stage for non-crc filesystems | ||
2221 | */ | ||
2222 | if (xfs_sb_version_hascrc(&mp->m_sb)) | ||
2223 | xlog_recovery_validate_buf_type(mp, bp, buf_f); | ||
2138 | } | 2224 | } |
2139 | 2225 | ||
2140 | /* | 2226 | /* |
@@ -2255,6 +2341,12 @@ xfs_qm_dqcheck( | |||
2255 | d->dd_diskdq.d_flags = type; | 2341 | d->dd_diskdq.d_flags = type; |
2256 | d->dd_diskdq.d_id = cpu_to_be32(id); | 2342 | d->dd_diskdq.d_id = cpu_to_be32(id); |
2257 | 2343 | ||
2344 | if (xfs_sb_version_hascrc(&mp->m_sb)) { | ||
2345 | uuid_copy(&d->dd_uuid, &mp->m_sb.sb_uuid); | ||
2346 | xfs_update_cksum((char *)d, sizeof(struct xfs_dqblk), | ||
2347 | XFS_DQUOT_CRC_OFF); | ||
2348 | } | ||
2349 | |||
2258 | return errs; | 2350 | return errs; |
2259 | } | 2351 | } |
2260 | 2352 | ||
@@ -2782,6 +2874,10 @@ xlog_recover_dquot_pass2( | |||
2782 | } | 2874 | } |
2783 | 2875 | ||
2784 | memcpy(ddq, recddq, item->ri_buf[1].i_len); | 2876 | memcpy(ddq, recddq, item->ri_buf[1].i_len); |
2877 | if (xfs_sb_version_hascrc(&mp->m_sb)) { | ||
2878 | xfs_update_cksum((char *)ddq, sizeof(struct xfs_dqblk), | ||
2879 | XFS_DQUOT_CRC_OFF); | ||
2880 | } | ||
2785 | 2881 | ||
2786 | ASSERT(dq_f->qlf_size == 2); | 2882 | ASSERT(dq_f->qlf_size == 2); |
2787 | ASSERT(bp->b_target->bt_mount == mp); | 2883 | ASSERT(bp->b_target->bt_mount == mp); |