aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs/xfs_log_recover.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/xfs_log_recover.c')
-rw-r--r--fs/xfs/xfs_log_recover.c114
1 files changed, 105 insertions, 9 deletions
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 93f03ec17eec..7cf5e4eafe28 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -1599,10 +1599,43 @@ xlog_recover_add_to_trans(
1599} 1599}
1600 1600
1601/* 1601/*
1602 * Sort the log items in the transaction. Cancelled buffers need 1602 * Sort the log items in the transaction.
1603 * to be put first so they are processed before any items that might 1603 *
1604 * modify the buffers. If they are cancelled, then the modifications 1604 * The ordering constraints are defined by the inode allocation and unlink
1605 * don't need to be replayed. 1605 * behaviour. The rules are:
1606 *
1607 * 1. Every item is only logged once in a given transaction. Hence it
1608 * represents the last logged state of the item. Hence ordering is
1609 * dependent on the order in which operations need to be performed so
1610 * required initial conditions are always met.
1611 *
1612 * 2. Cancelled buffers are recorded in pass 1 in a separate table and
1613 * there's nothing to replay from them so we can simply cull them
1614 * from the transaction. However, we can't do that until after we've
1615 * replayed all the other items because they may be dependent on the
1616 * cancelled buffer and replaying the cancelled buffer can remove it
1617 * form the cancelled buffer table. Hence they have tobe done last.
1618 *
1619 * 3. Inode allocation buffers must be replayed before inode items that
1620 * read the buffer and replay changes into it.
1621 *
1622 * 4. Inode unlink buffers must be replayed after inode items are replayed.
1623 * This ensures that inodes are completely flushed to the inode buffer
1624 * in a "free" state before we remove the unlinked inode list pointer.
1625 *
1626 * Hence the ordering needs to be inode allocation buffers first, inode items
1627 * second, inode unlink buffers third and cancelled buffers last.
1628 *
1629 * But there's a problem with that - we can't tell an inode allocation buffer
1630 * apart from a regular buffer, so we can't separate them. We can, however,
1631 * tell an inode unlink buffer from the others, and so we can separate them out
1632 * from all the other buffers and move them to last.
1633 *
1634 * Hence, 4 lists, in order from head to tail:
1635 * - buffer_list for all buffers except cancelled/inode unlink buffers
1636 * - item_list for all non-buffer items
1637 * - inode_buffer_list for inode unlink buffers
1638 * - cancel_list for the cancelled buffers
1606 */ 1639 */
1607STATIC int 1640STATIC int
1608xlog_recover_reorder_trans( 1641xlog_recover_reorder_trans(
@@ -1612,6 +1645,10 @@ xlog_recover_reorder_trans(
1612{ 1645{
1613 xlog_recover_item_t *item, *n; 1646 xlog_recover_item_t *item, *n;
1614 LIST_HEAD(sort_list); 1647 LIST_HEAD(sort_list);
1648 LIST_HEAD(cancel_list);
1649 LIST_HEAD(buffer_list);
1650 LIST_HEAD(inode_buffer_list);
1651 LIST_HEAD(inode_list);
1615 1652
1616 list_splice_init(&trans->r_itemq, &sort_list); 1653 list_splice_init(&trans->r_itemq, &sort_list);
1617 list_for_each_entry_safe(item, n, &sort_list, ri_list) { 1654 list_for_each_entry_safe(item, n, &sort_list, ri_list) {
@@ -1619,12 +1656,18 @@ xlog_recover_reorder_trans(
1619 1656
1620 switch (ITEM_TYPE(item)) { 1657 switch (ITEM_TYPE(item)) {
1621 case XFS_LI_BUF: 1658 case XFS_LI_BUF:
1622 if (!(buf_f->blf_flags & XFS_BLF_CANCEL)) { 1659 if (buf_f->blf_flags & XFS_BLF_CANCEL) {
1623 trace_xfs_log_recover_item_reorder_head(log, 1660 trace_xfs_log_recover_item_reorder_head(log,
1624 trans, item, pass); 1661 trans, item, pass);
1625 list_move(&item->ri_list, &trans->r_itemq); 1662 list_move(&item->ri_list, &cancel_list);
1663 break;
1664 }
1665 if (buf_f->blf_flags & XFS_BLF_INODE_BUF) {
1666 list_move(&item->ri_list, &inode_buffer_list);
1626 break; 1667 break;
1627 } 1668 }
1669 list_move_tail(&item->ri_list, &buffer_list);
1670 break;
1628 case XFS_LI_INODE: 1671 case XFS_LI_INODE:
1629 case XFS_LI_DQUOT: 1672 case XFS_LI_DQUOT:
1630 case XFS_LI_QUOTAOFF: 1673 case XFS_LI_QUOTAOFF:
@@ -1632,7 +1675,7 @@ xlog_recover_reorder_trans(
1632 case XFS_LI_EFI: 1675 case XFS_LI_EFI:
1633 trace_xfs_log_recover_item_reorder_tail(log, 1676 trace_xfs_log_recover_item_reorder_tail(log,
1634 trans, item, pass); 1677 trans, item, pass);
1635 list_move_tail(&item->ri_list, &trans->r_itemq); 1678 list_move_tail(&item->ri_list, &inode_list);
1636 break; 1679 break;
1637 default: 1680 default:
1638 xfs_warn(log->l_mp, 1681 xfs_warn(log->l_mp,
@@ -1643,6 +1686,14 @@ xlog_recover_reorder_trans(
1643 } 1686 }
1644 } 1687 }
1645 ASSERT(list_empty(&sort_list)); 1688 ASSERT(list_empty(&sort_list));
1689 if (!list_empty(&buffer_list))
1690 list_splice(&buffer_list, &trans->r_itemq);
1691 if (!list_empty(&inode_list))
1692 list_splice_tail(&inode_list, &trans->r_itemq);
1693 if (!list_empty(&inode_buffer_list))
1694 list_splice_tail(&inode_buffer_list, &trans->r_itemq);
1695 if (!list_empty(&cancel_list))
1696 list_splice_tail(&cancel_list, &trans->r_itemq);
1646 return 0; 1697 return 0;
1647} 1698}
1648 1699
@@ -1794,7 +1845,13 @@ xlog_recover_do_inode_buffer(
1794 xfs_agino_t *buffer_nextp; 1845 xfs_agino_t *buffer_nextp;
1795 1846
1796 trace_xfs_log_recover_buf_inode_buf(mp->m_log, buf_f); 1847 trace_xfs_log_recover_buf_inode_buf(mp->m_log, buf_f);
1797 bp->b_ops = &xfs_inode_buf_ops; 1848
1849 /*
1850 * Post recovery validation only works properly on CRC enabled
1851 * filesystems.
1852 */
1853 if (xfs_sb_version_hascrc(&mp->m_sb))
1854 bp->b_ops = &xfs_inode_buf_ops;
1798 1855
1799 inodes_per_buf = BBTOB(bp->b_io_length) >> mp->m_sb.sb_inodelog; 1856 inodes_per_buf = BBTOB(bp->b_io_length) >> mp->m_sb.sb_inodelog;
1800 for (i = 0; i < inodes_per_buf; i++) { 1857 for (i = 0; i < inodes_per_buf; i++) {
@@ -1861,6 +1918,15 @@ xlog_recover_do_inode_buffer(
1861 buffer_nextp = (xfs_agino_t *)xfs_buf_offset(bp, 1918 buffer_nextp = (xfs_agino_t *)xfs_buf_offset(bp,
1862 next_unlinked_offset); 1919 next_unlinked_offset);
1863 *buffer_nextp = *logged_nextp; 1920 *buffer_nextp = *logged_nextp;
1921
1922 /*
1923 * If necessary, recalculate the CRC in the on-disk inode. We
1924 * have to leave the inode in a consistent state for whoever
1925 * reads it next....
1926 */
1927 xfs_dinode_calc_crc(mp, (struct xfs_dinode *)
1928 xfs_buf_offset(bp, i * mp->m_sb.sb_inodesize));
1929
1864 } 1930 }
1865 1931
1866 return 0; 1932 return 0;
@@ -2097,6 +2163,17 @@ xlog_recover_do_reg_buffer(
2097 ((uint)bit << XFS_BLF_SHIFT) + (nbits << XFS_BLF_SHIFT)); 2163 ((uint)bit << XFS_BLF_SHIFT) + (nbits << XFS_BLF_SHIFT));
2098 2164
2099 /* 2165 /*
2166 * The dirty regions logged in the buffer, even though
2167 * contiguous, may span multiple chunks. This is because the
2168 * dirty region may span a physical page boundary in a buffer
2169 * and hence be split into two separate vectors for writing into
2170 * the log. Hence we need to trim nbits back to the length of
2171 * the current region being copied out of the log.
2172 */
2173 if (item->ri_buf[i].i_len < (nbits << XFS_BLF_SHIFT))
2174 nbits = item->ri_buf[i].i_len >> XFS_BLF_SHIFT;
2175
2176 /*
2100 * Do a sanity check if this is a dquot buffer. Just checking 2177 * Do a sanity check if this is a dquot buffer. Just checking
2101 * the first dquot in the buffer should do. XXXThis is 2178 * the first dquot in the buffer should do. XXXThis is
2102 * probably a good thing to do for other buf types also. 2179 * probably a good thing to do for other buf types also.
@@ -2134,7 +2211,16 @@ xlog_recover_do_reg_buffer(
2134 /* Shouldn't be any more regions */ 2211 /* Shouldn't be any more regions */
2135 ASSERT(i == item->ri_total); 2212 ASSERT(i == item->ri_total);
2136 2213
2137 xlog_recovery_validate_buf_type(mp, bp, buf_f); 2214 /*
2215 * We can only do post recovery validation on items on CRC enabled
2216 * fielsystems as we need to know when the buffer was written to be able
2217 * to determine if we should have replayed the item. If we replay old
2218 * metadata over a newer buffer, then it will enter a temporarily
2219 * inconsistent state resulting in verification failures. Hence for now
2220 * just avoid the verification stage for non-crc filesystems
2221 */
2222 if (xfs_sb_version_hascrc(&mp->m_sb))
2223 xlog_recovery_validate_buf_type(mp, bp, buf_f);
2138} 2224}
2139 2225
2140/* 2226/*
@@ -2255,6 +2341,12 @@ xfs_qm_dqcheck(
2255 d->dd_diskdq.d_flags = type; 2341 d->dd_diskdq.d_flags = type;
2256 d->dd_diskdq.d_id = cpu_to_be32(id); 2342 d->dd_diskdq.d_id = cpu_to_be32(id);
2257 2343
2344 if (xfs_sb_version_hascrc(&mp->m_sb)) {
2345 uuid_copy(&d->dd_uuid, &mp->m_sb.sb_uuid);
2346 xfs_update_cksum((char *)d, sizeof(struct xfs_dqblk),
2347 XFS_DQUOT_CRC_OFF);
2348 }
2349
2258 return errs; 2350 return errs;
2259} 2351}
2260 2352
@@ -2782,6 +2874,10 @@ xlog_recover_dquot_pass2(
2782 } 2874 }
2783 2875
2784 memcpy(ddq, recddq, item->ri_buf[1].i_len); 2876 memcpy(ddq, recddq, item->ri_buf[1].i_len);
2877 if (xfs_sb_version_hascrc(&mp->m_sb)) {
2878 xfs_update_cksum((char *)ddq, sizeof(struct xfs_dqblk),
2879 XFS_DQUOT_CRC_OFF);
2880 }
2785 2881
2786 ASSERT(dq_f->qlf_size == 2); 2882 ASSERT(dq_f->qlf_size == 2);
2787 ASSERT(bp->b_target->bt_mount == mp); 2883 ASSERT(bp->b_target->bt_mount == mp);