aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs/xfs_log_recover.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/xfs_log_recover.c')
-rw-r--r--fs/xfs/xfs_log_recover.c127
1 files changed, 120 insertions, 7 deletions
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 7cf5e4eafe28..7681b19aa5dc 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -45,6 +45,7 @@
45#include "xfs_cksum.h" 45#include "xfs_cksum.h"
46#include "xfs_trace.h" 46#include "xfs_trace.h"
47#include "xfs_icache.h" 47#include "xfs_icache.h"
48#include "xfs_icreate_item.h"
48 49
49/* Need all the magic numbers and buffer ops structures from these headers */ 50/* Need all the magic numbers and buffer ops structures from these headers */
50#include "xfs_symlink.h" 51#include "xfs_symlink.h"
@@ -1617,7 +1618,10 @@ xlog_recover_add_to_trans(
1617 * form the cancelled buffer table. Hence they have tobe done last. 1618 * form the cancelled buffer table. Hence they have tobe done last.
1618 * 1619 *
1619 * 3. Inode allocation buffers must be replayed before inode items that 1620 * 3. Inode allocation buffers must be replayed before inode items that
1620 * read the buffer and replay changes into it. 1621 * read the buffer and replay changes into it. For filesystems using the
1622 * ICREATE transactions, this means XFS_LI_ICREATE objects need to get
1623 * treated the same as inode allocation buffers as they create and
1624 * initialise the buffers directly.
1621 * 1625 *
1622 * 4. Inode unlink buffers must be replayed after inode items are replayed. 1626 * 4. Inode unlink buffers must be replayed after inode items are replayed.
1623 * This ensures that inodes are completely flushed to the inode buffer 1627 * This ensures that inodes are completely flushed to the inode buffer
@@ -1632,10 +1636,17 @@ xlog_recover_add_to_trans(
1632 * from all the other buffers and move them to last. 1636 * from all the other buffers and move them to last.
1633 * 1637 *
1634 * Hence, 4 lists, in order from head to tail: 1638 * Hence, 4 lists, in order from head to tail:
1635 * - buffer_list for all buffers except cancelled/inode unlink buffers 1639 * - buffer_list for all buffers except cancelled/inode unlink buffers
1636 * - item_list for all non-buffer items 1640 * - item_list for all non-buffer items
1637 * - inode_buffer_list for inode unlink buffers 1641 * - inode_buffer_list for inode unlink buffers
1638 * - cancel_list for the cancelled buffers 1642 * - cancel_list for the cancelled buffers
1643 *
1644 * Note that we add objects to the tail of the lists so that first-to-last
1645 * ordering is preserved within the lists. Adding objects to the head of the
1646 * list means when we traverse from the head we walk them in last-to-first
1647 * order. For cancelled buffers and inode unlink buffers this doesn't matter,
1648 * but for all other items there may be specific ordering that we need to
1649 * preserve.
1639 */ 1650 */
1640STATIC int 1651STATIC int
1641xlog_recover_reorder_trans( 1652xlog_recover_reorder_trans(
@@ -1655,6 +1666,9 @@ xlog_recover_reorder_trans(
1655 xfs_buf_log_format_t *buf_f = item->ri_buf[0].i_addr; 1666 xfs_buf_log_format_t *buf_f = item->ri_buf[0].i_addr;
1656 1667
1657 switch (ITEM_TYPE(item)) { 1668 switch (ITEM_TYPE(item)) {
1669 case XFS_LI_ICREATE:
1670 list_move_tail(&item->ri_list, &buffer_list);
1671 break;
1658 case XFS_LI_BUF: 1672 case XFS_LI_BUF:
1659 if (buf_f->blf_flags & XFS_BLF_CANCEL) { 1673 if (buf_f->blf_flags & XFS_BLF_CANCEL) {
1660 trace_xfs_log_recover_item_reorder_head(log, 1674 trace_xfs_log_recover_item_reorder_head(log,
@@ -2578,8 +2592,16 @@ xlog_recover_inode_pass2(
2578 goto error; 2592 goto error;
2579 } 2593 }
2580 2594
2581 /* Skip replay when the on disk inode is newer than the log one */ 2595 /*
2582 if (dicp->di_flushiter < be16_to_cpu(dip->di_flushiter)) { 2596 * di_flushiter is only valid for v1/2 inodes. All changes for v3 inodes
2597 * are transactional and if ordering is necessary we can determine that
2598 * more accurately by the LSN field in the V3 inode core. Don't trust
2599 * the inode versions we might be changing them here - use the
2600 * superblock flag to determine whether we need to look at di_flushiter
2601 * to skip replay when the on disk inode is newer than the log one
2602 */
2603 if (!xfs_sb_version_hascrc(&mp->m_sb) &&
2604 dicp->di_flushiter < be16_to_cpu(dip->di_flushiter)) {
2583 /* 2605 /*
2584 * Deal with the wrap case, DI_MAX_FLUSH is less 2606 * Deal with the wrap case, DI_MAX_FLUSH is less
2585 * than smaller numbers 2607 * than smaller numbers
@@ -2594,6 +2616,7 @@ xlog_recover_inode_pass2(
2594 goto error; 2616 goto error;
2595 } 2617 }
2596 } 2618 }
2619
2597 /* Take the opportunity to reset the flush iteration count */ 2620 /* Take the opportunity to reset the flush iteration count */
2598 dicp->di_flushiter = 0; 2621 dicp->di_flushiter = 0;
2599 2622
@@ -2982,6 +3005,93 @@ xlog_recover_efd_pass2(
2982} 3005}
2983 3006
2984/* 3007/*
3008 * This routine is called when an inode create format structure is found in a
3009 * committed transaction in the log. It's purpose is to initialise the inodes
3010 * being allocated on disk. This requires us to get inode cluster buffers that
3011 * match the range to be intialised, stamped with inode templates and written
3012 * by delayed write so that subsequent modifications will hit the cached buffer
3013 * and only need writing out at the end of recovery.
3014 */
3015STATIC int
3016xlog_recover_do_icreate_pass2(
3017 struct xlog *log,
3018 struct list_head *buffer_list,
3019 xlog_recover_item_t *item)
3020{
3021 struct xfs_mount *mp = log->l_mp;
3022 struct xfs_icreate_log *icl;
3023 xfs_agnumber_t agno;
3024 xfs_agblock_t agbno;
3025 unsigned int count;
3026 unsigned int isize;
3027 xfs_agblock_t length;
3028
3029 icl = (struct xfs_icreate_log *)item->ri_buf[0].i_addr;
3030 if (icl->icl_type != XFS_LI_ICREATE) {
3031 xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad type");
3032 return EINVAL;
3033 }
3034
3035 if (icl->icl_size != 1) {
3036 xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad icl size");
3037 return EINVAL;
3038 }
3039
3040 agno = be32_to_cpu(icl->icl_ag);
3041 if (agno >= mp->m_sb.sb_agcount) {
3042 xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad agno");
3043 return EINVAL;
3044 }
3045 agbno = be32_to_cpu(icl->icl_agbno);
3046 if (!agbno || agbno == NULLAGBLOCK || agbno >= mp->m_sb.sb_agblocks) {
3047 xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad agbno");
3048 return EINVAL;
3049 }
3050 isize = be32_to_cpu(icl->icl_isize);
3051 if (isize != mp->m_sb.sb_inodesize) {
3052 xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad isize");
3053 return EINVAL;
3054 }
3055 count = be32_to_cpu(icl->icl_count);
3056 if (!count) {
3057 xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad count");
3058 return EINVAL;
3059 }
3060 length = be32_to_cpu(icl->icl_length);
3061 if (!length || length >= mp->m_sb.sb_agblocks) {
3062 xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad length");
3063 return EINVAL;
3064 }
3065
3066 /* existing allocation is fixed value */
3067 ASSERT(count == XFS_IALLOC_INODES(mp));
3068 ASSERT(length == XFS_IALLOC_BLOCKS(mp));
3069 if (count != XFS_IALLOC_INODES(mp) ||
3070 length != XFS_IALLOC_BLOCKS(mp)) {
3071 xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad count 2");
3072 return EINVAL;
3073 }
3074
3075 /*
3076 * Inode buffers can be freed. Do not replay the inode initialisation as
3077 * we could be overwriting something written after this inode buffer was
3078 * cancelled.
3079 *
3080 * XXX: we need to iterate all buffers and only init those that are not
3081 * cancelled. I think that a more fine grained factoring of
3082 * xfs_ialloc_inode_init may be appropriate here to enable this to be
3083 * done easily.
3084 */
3085 if (xlog_check_buffer_cancelled(log,
3086 XFS_AGB_TO_DADDR(mp, agno, agbno), length, 0))
3087 return 0;
3088
3089 xfs_ialloc_inode_init(mp, NULL, buffer_list, agno, agbno, length,
3090 be32_to_cpu(icl->icl_gen));
3091 return 0;
3092}
3093
3094/*
2985 * Free up any resources allocated by the transaction 3095 * Free up any resources allocated by the transaction
2986 * 3096 *
2987 * Remember that EFIs, EFDs, and IUNLINKs are handled later. 3097 * Remember that EFIs, EFDs, and IUNLINKs are handled later.
@@ -3023,6 +3133,7 @@ xlog_recover_commit_pass1(
3023 case XFS_LI_EFI: 3133 case XFS_LI_EFI:
3024 case XFS_LI_EFD: 3134 case XFS_LI_EFD:
3025 case XFS_LI_DQUOT: 3135 case XFS_LI_DQUOT:
3136 case XFS_LI_ICREATE:
3026 /* nothing to do in pass 1 */ 3137 /* nothing to do in pass 1 */
3027 return 0; 3138 return 0;
3028 default: 3139 default:
@@ -3053,6 +3164,8 @@ xlog_recover_commit_pass2(
3053 return xlog_recover_efd_pass2(log, item); 3164 return xlog_recover_efd_pass2(log, item);
3054 case XFS_LI_DQUOT: 3165 case XFS_LI_DQUOT:
3055 return xlog_recover_dquot_pass2(log, buffer_list, item); 3166 return xlog_recover_dquot_pass2(log, buffer_list, item);
3167 case XFS_LI_ICREATE:
3168 return xlog_recover_do_icreate_pass2(log, buffer_list, item);
3056 case XFS_LI_QUOTAOFF: 3169 case XFS_LI_QUOTAOFF:
3057 /* nothing to do in pass2 */ 3170 /* nothing to do in pass2 */
3058 return 0; 3171 return 0;