aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs/xfs_log_recover.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/xfs_log_recover.c')
-rw-r--r--fs/xfs/xfs_log_recover.c95
1 files changed, 88 insertions, 7 deletions
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 93f03ec17eec..45a85ff84da1 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -1599,10 +1599,43 @@ xlog_recover_add_to_trans(
1599} 1599}
1600 1600
1601/* 1601/*
1602 * Sort the log items in the transaction. Cancelled buffers need 1602 * Sort the log items in the transaction.
1603 * to be put first so they are processed before any items that might 1603 *
1604 * modify the buffers. If they are cancelled, then the modifications 1604 * The ordering constraints are defined by the inode allocation and unlink
1605 * don't need to be replayed. 1605 * behaviour. The rules are:
1606 *
1607 * 1. Every item is only logged once in a given transaction. Hence it
1608 * represents the last logged state of the item. Hence ordering is
1609 * dependent on the order in which operations need to be performed so
1610 * required initial conditions are always met.
1611 *
1612 * 2. Cancelled buffers are recorded in pass 1 in a separate table and
1613 * there's nothing to replay from them so we can simply cull them
1614 * from the transaction. However, we can't do that until after we've
1615 * replayed all the other items because they may be dependent on the
1616 * cancelled buffer and replaying the cancelled buffer can remove it
1617 * form the cancelled buffer table. Hence they have tobe done last.
1618 *
1619 * 3. Inode allocation buffers must be replayed before inode items that
1620 * read the buffer and replay changes into it.
1621 *
1622 * 4. Inode unlink buffers must be replayed after inode items are replayed.
1623 * This ensures that inodes are completely flushed to the inode buffer
1624 * in a "free" state before we remove the unlinked inode list pointer.
1625 *
1626 * Hence the ordering needs to be inode allocation buffers first, inode items
1627 * second, inode unlink buffers third and cancelled buffers last.
1628 *
1629 * But there's a problem with that - we can't tell an inode allocation buffer
1630 * apart from a regular buffer, so we can't separate them. We can, however,
1631 * tell an inode unlink buffer from the others, and so we can separate them out
1632 * from all the other buffers and move them to last.
1633 *
1634 * Hence, 4 lists, in order from head to tail:
1635 * - buffer_list for all buffers except cancelled/inode unlink buffers
1636 * - item_list for all non-buffer items
1637 * - inode_buffer_list for inode unlink buffers
1638 * - cancel_list for the cancelled buffers
1606 */ 1639 */
1607STATIC int 1640STATIC int
1608xlog_recover_reorder_trans( 1641xlog_recover_reorder_trans(
@@ -1612,6 +1645,10 @@ xlog_recover_reorder_trans(
1612{ 1645{
1613 xlog_recover_item_t *item, *n; 1646 xlog_recover_item_t *item, *n;
1614 LIST_HEAD(sort_list); 1647 LIST_HEAD(sort_list);
1648 LIST_HEAD(cancel_list);
1649 LIST_HEAD(buffer_list);
1650 LIST_HEAD(inode_buffer_list);
1651 LIST_HEAD(inode_list);
1615 1652
1616 list_splice_init(&trans->r_itemq, &sort_list); 1653 list_splice_init(&trans->r_itemq, &sort_list);
1617 list_for_each_entry_safe(item, n, &sort_list, ri_list) { 1654 list_for_each_entry_safe(item, n, &sort_list, ri_list) {
@@ -1619,12 +1656,18 @@ xlog_recover_reorder_trans(
1619 1656
1620 switch (ITEM_TYPE(item)) { 1657 switch (ITEM_TYPE(item)) {
1621 case XFS_LI_BUF: 1658 case XFS_LI_BUF:
1622 if (!(buf_f->blf_flags & XFS_BLF_CANCEL)) { 1659 if (buf_f->blf_flags & XFS_BLF_CANCEL) {
1623 trace_xfs_log_recover_item_reorder_head(log, 1660 trace_xfs_log_recover_item_reorder_head(log,
1624 trans, item, pass); 1661 trans, item, pass);
1625 list_move(&item->ri_list, &trans->r_itemq); 1662 list_move(&item->ri_list, &cancel_list);
1626 break; 1663 break;
1627 } 1664 }
1665 if (buf_f->blf_flags & XFS_BLF_INODE_BUF) {
1666 list_move(&item->ri_list, &inode_buffer_list);
1667 break;
1668 }
1669 list_move_tail(&item->ri_list, &buffer_list);
1670 break;
1628 case XFS_LI_INODE: 1671 case XFS_LI_INODE:
1629 case XFS_LI_DQUOT: 1672 case XFS_LI_DQUOT:
1630 case XFS_LI_QUOTAOFF: 1673 case XFS_LI_QUOTAOFF:
@@ -1632,7 +1675,7 @@ xlog_recover_reorder_trans(
1632 case XFS_LI_EFI: 1675 case XFS_LI_EFI:
1633 trace_xfs_log_recover_item_reorder_tail(log, 1676 trace_xfs_log_recover_item_reorder_tail(log,
1634 trans, item, pass); 1677 trans, item, pass);
1635 list_move_tail(&item->ri_list, &trans->r_itemq); 1678 list_move_tail(&item->ri_list, &inode_list);
1636 break; 1679 break;
1637 default: 1680 default:
1638 xfs_warn(log->l_mp, 1681 xfs_warn(log->l_mp,
@@ -1643,6 +1686,14 @@ xlog_recover_reorder_trans(
1643 } 1686 }
1644 } 1687 }
1645 ASSERT(list_empty(&sort_list)); 1688 ASSERT(list_empty(&sort_list));
1689 if (!list_empty(&buffer_list))
1690 list_splice(&buffer_list, &trans->r_itemq);
1691 if (!list_empty(&inode_list))
1692 list_splice_tail(&inode_list, &trans->r_itemq);
1693 if (!list_empty(&inode_buffer_list))
1694 list_splice_tail(&inode_buffer_list, &trans->r_itemq);
1695 if (!list_empty(&cancel_list))
1696 list_splice_tail(&cancel_list, &trans->r_itemq);
1646 return 0; 1697 return 0;
1647} 1698}
1648 1699
@@ -1861,6 +1912,15 @@ xlog_recover_do_inode_buffer(
1861 buffer_nextp = (xfs_agino_t *)xfs_buf_offset(bp, 1912 buffer_nextp = (xfs_agino_t *)xfs_buf_offset(bp,
1862 next_unlinked_offset); 1913 next_unlinked_offset);
1863 *buffer_nextp = *logged_nextp; 1914 *buffer_nextp = *logged_nextp;
1915
1916 /*
1917 * If necessary, recalculate the CRC in the on-disk inode. We
1918 * have to leave the inode in a consistent state for whoever
1919 * reads it next....
1920 */
1921 xfs_dinode_calc_crc(mp, (struct xfs_dinode *)
1922 xfs_buf_offset(bp, i * mp->m_sb.sb_inodesize));
1923
1864 } 1924 }
1865 1925
1866 return 0; 1926 return 0;
@@ -2097,6 +2157,17 @@ xlog_recover_do_reg_buffer(
2097 ((uint)bit << XFS_BLF_SHIFT) + (nbits << XFS_BLF_SHIFT)); 2157 ((uint)bit << XFS_BLF_SHIFT) + (nbits << XFS_BLF_SHIFT));
2098 2158
2099 /* 2159 /*
2160 * The dirty regions logged in the buffer, even though
2161 * contiguous, may span multiple chunks. This is because the
2162 * dirty region may span a physical page boundary in a buffer
2163 * and hence be split into two separate vectors for writing into
2164 * the log. Hence we need to trim nbits back to the length of
2165 * the current region being copied out of the log.
2166 */
2167 if (item->ri_buf[i].i_len < (nbits << XFS_BLF_SHIFT))
2168 nbits = item->ri_buf[i].i_len >> XFS_BLF_SHIFT;
2169
2170 /*
2100 * Do a sanity check if this is a dquot buffer. Just checking 2171 * Do a sanity check if this is a dquot buffer. Just checking
2101 * the first dquot in the buffer should do. XXXThis is 2172 * the first dquot in the buffer should do. XXXThis is
2102 * probably a good thing to do for other buf types also. 2173 * probably a good thing to do for other buf types also.
@@ -2255,6 +2326,12 @@ xfs_qm_dqcheck(
2255 d->dd_diskdq.d_flags = type; 2326 d->dd_diskdq.d_flags = type;
2256 d->dd_diskdq.d_id = cpu_to_be32(id); 2327 d->dd_diskdq.d_id = cpu_to_be32(id);
2257 2328
2329 if (xfs_sb_version_hascrc(&mp->m_sb)) {
2330 uuid_copy(&d->dd_uuid, &mp->m_sb.sb_uuid);
2331 xfs_update_cksum((char *)d, sizeof(struct xfs_dqblk),
2332 XFS_DQUOT_CRC_OFF);
2333 }
2334
2258 return errs; 2335 return errs;
2259} 2336}
2260 2337
@@ -2782,6 +2859,10 @@ xlog_recover_dquot_pass2(
2782 } 2859 }
2783 2860
2784 memcpy(ddq, recddq, item->ri_buf[1].i_len); 2861 memcpy(ddq, recddq, item->ri_buf[1].i_len);
2862 if (xfs_sb_version_hascrc(&mp->m_sb)) {
2863 xfs_update_cksum((char *)ddq, sizeof(struct xfs_dqblk),
2864 XFS_DQUOT_CRC_OFF);
2865 }
2785 2866
2786 ASSERT(dq_f->qlf_size == 2); 2867 ASSERT(dq_f->qlf_size == 2);
2787 ASSERT(bp->b_target->bt_mount == mp); 2868 ASSERT(bp->b_target->bt_mount == mp);