diff options
Diffstat (limited to 'fs/xfs/xfs_log_recover.c')
-rw-r--r-- | fs/xfs/xfs_log_recover.c | 127 |
1 files changed, 120 insertions, 7 deletions
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 7cf5e4eafe28..7681b19aa5dc 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c | |||
@@ -45,6 +45,7 @@ | |||
45 | #include "xfs_cksum.h" | 45 | #include "xfs_cksum.h" |
46 | #include "xfs_trace.h" | 46 | #include "xfs_trace.h" |
47 | #include "xfs_icache.h" | 47 | #include "xfs_icache.h" |
48 | #include "xfs_icreate_item.h" | ||
48 | 49 | ||
49 | /* Need all the magic numbers and buffer ops structures from these headers */ | 50 | /* Need all the magic numbers and buffer ops structures from these headers */ |
50 | #include "xfs_symlink.h" | 51 | #include "xfs_symlink.h" |
@@ -1617,7 +1618,10 @@ xlog_recover_add_to_trans( | |||
1617 | * form the cancelled buffer table. Hence they have tobe done last. | 1618 | * form the cancelled buffer table. Hence they have tobe done last. |
1618 | * | 1619 | * |
1619 | * 3. Inode allocation buffers must be replayed before inode items that | 1620 | * 3. Inode allocation buffers must be replayed before inode items that |
1620 | * read the buffer and replay changes into it. | 1621 | * read the buffer and replay changes into it. For filesystems using the |
1622 | * ICREATE transactions, this means XFS_LI_ICREATE objects need to get | ||
1623 | * treated the same as inode allocation buffers as they create and | ||
1624 | * initialise the buffers directly. | ||
1621 | * | 1625 | * |
1622 | * 4. Inode unlink buffers must be replayed after inode items are replayed. | 1626 | * 4. Inode unlink buffers must be replayed after inode items are replayed. |
1623 | * This ensures that inodes are completely flushed to the inode buffer | 1627 | * This ensures that inodes are completely flushed to the inode buffer |
@@ -1632,10 +1636,17 @@ xlog_recover_add_to_trans( | |||
1632 | * from all the other buffers and move them to last. | 1636 | * from all the other buffers and move them to last. |
1633 | * | 1637 | * |
1634 | * Hence, 4 lists, in order from head to tail: | 1638 | * Hence, 4 lists, in order from head to tail: |
1635 | * - buffer_list for all buffers except cancelled/inode unlink buffers | 1639 | * - buffer_list for all buffers except cancelled/inode unlink buffers |
1636 | * - item_list for all non-buffer items | 1640 | * - item_list for all non-buffer items |
1637 | * - inode_buffer_list for inode unlink buffers | 1641 | * - inode_buffer_list for inode unlink buffers |
1638 | * - cancel_list for the cancelled buffers | 1642 | * - cancel_list for the cancelled buffers |
1643 | * | ||
1644 | * Note that we add objects to the tail of the lists so that first-to-last | ||
1645 | * ordering is preserved within the lists. Adding objects to the head of the | ||
1646 | * list means when we traverse from the head we walk them in last-to-first | ||
1647 | * order. For cancelled buffers and inode unlink buffers this doesn't matter, | ||
1648 | * but for all other items there may be specific ordering that we need to | ||
1649 | * preserve. | ||
1639 | */ | 1650 | */ |
1640 | STATIC int | 1651 | STATIC int |
1641 | xlog_recover_reorder_trans( | 1652 | xlog_recover_reorder_trans( |
@@ -1655,6 +1666,9 @@ xlog_recover_reorder_trans( | |||
1655 | xfs_buf_log_format_t *buf_f = item->ri_buf[0].i_addr; | 1666 | xfs_buf_log_format_t *buf_f = item->ri_buf[0].i_addr; |
1656 | 1667 | ||
1657 | switch (ITEM_TYPE(item)) { | 1668 | switch (ITEM_TYPE(item)) { |
1669 | case XFS_LI_ICREATE: | ||
1670 | list_move_tail(&item->ri_list, &buffer_list); | ||
1671 | break; | ||
1658 | case XFS_LI_BUF: | 1672 | case XFS_LI_BUF: |
1659 | if (buf_f->blf_flags & XFS_BLF_CANCEL) { | 1673 | if (buf_f->blf_flags & XFS_BLF_CANCEL) { |
1660 | trace_xfs_log_recover_item_reorder_head(log, | 1674 | trace_xfs_log_recover_item_reorder_head(log, |
@@ -2578,8 +2592,16 @@ xlog_recover_inode_pass2( | |||
2578 | goto error; | 2592 | goto error; |
2579 | } | 2593 | } |
2580 | 2594 | ||
2581 | /* Skip replay when the on disk inode is newer than the log one */ | 2595 | /* |
2582 | if (dicp->di_flushiter < be16_to_cpu(dip->di_flushiter)) { | 2596 | * di_flushiter is only valid for v1/2 inodes. All changes for v3 inodes |
2597 | * are transactional and if ordering is necessary we can determine that | ||
2598 | * more accurately by the LSN field in the V3 inode core. Don't trust | ||
2599 | * the inode versions we might be changing them here - use the | ||
2600 | * superblock flag to determine whether we need to look at di_flushiter | ||
2601 | * to skip replay when the on disk inode is newer than the log one | ||
2602 | */ | ||
2603 | if (!xfs_sb_version_hascrc(&mp->m_sb) && | ||
2604 | dicp->di_flushiter < be16_to_cpu(dip->di_flushiter)) { | ||
2583 | /* | 2605 | /* |
2584 | * Deal with the wrap case, DI_MAX_FLUSH is less | 2606 | * Deal with the wrap case, DI_MAX_FLUSH is less |
2585 | * than smaller numbers | 2607 | * than smaller numbers |
@@ -2594,6 +2616,7 @@ xlog_recover_inode_pass2( | |||
2594 | goto error; | 2616 | goto error; |
2595 | } | 2617 | } |
2596 | } | 2618 | } |
2619 | |||
2597 | /* Take the opportunity to reset the flush iteration count */ | 2620 | /* Take the opportunity to reset the flush iteration count */ |
2598 | dicp->di_flushiter = 0; | 2621 | dicp->di_flushiter = 0; |
2599 | 2622 | ||
@@ -2982,6 +3005,93 @@ xlog_recover_efd_pass2( | |||
2982 | } | 3005 | } |
2983 | 3006 | ||
2984 | /* | 3007 | /* |
3008 | * This routine is called when an inode create format structure is found in a | ||
3009 | * committed transaction in the log. It's purpose is to initialise the inodes | ||
3010 | * being allocated on disk. This requires us to get inode cluster buffers that | ||
3011 | * match the range to be intialised, stamped with inode templates and written | ||
3012 | * by delayed write so that subsequent modifications will hit the cached buffer | ||
3013 | * and only need writing out at the end of recovery. | ||
3014 | */ | ||
3015 | STATIC int | ||
3016 | xlog_recover_do_icreate_pass2( | ||
3017 | struct xlog *log, | ||
3018 | struct list_head *buffer_list, | ||
3019 | xlog_recover_item_t *item) | ||
3020 | { | ||
3021 | struct xfs_mount *mp = log->l_mp; | ||
3022 | struct xfs_icreate_log *icl; | ||
3023 | xfs_agnumber_t agno; | ||
3024 | xfs_agblock_t agbno; | ||
3025 | unsigned int count; | ||
3026 | unsigned int isize; | ||
3027 | xfs_agblock_t length; | ||
3028 | |||
3029 | icl = (struct xfs_icreate_log *)item->ri_buf[0].i_addr; | ||
3030 | if (icl->icl_type != XFS_LI_ICREATE) { | ||
3031 | xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad type"); | ||
3032 | return EINVAL; | ||
3033 | } | ||
3034 | |||
3035 | if (icl->icl_size != 1) { | ||
3036 | xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad icl size"); | ||
3037 | return EINVAL; | ||
3038 | } | ||
3039 | |||
3040 | agno = be32_to_cpu(icl->icl_ag); | ||
3041 | if (agno >= mp->m_sb.sb_agcount) { | ||
3042 | xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad agno"); | ||
3043 | return EINVAL; | ||
3044 | } | ||
3045 | agbno = be32_to_cpu(icl->icl_agbno); | ||
3046 | if (!agbno || agbno == NULLAGBLOCK || agbno >= mp->m_sb.sb_agblocks) { | ||
3047 | xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad agbno"); | ||
3048 | return EINVAL; | ||
3049 | } | ||
3050 | isize = be32_to_cpu(icl->icl_isize); | ||
3051 | if (isize != mp->m_sb.sb_inodesize) { | ||
3052 | xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad isize"); | ||
3053 | return EINVAL; | ||
3054 | } | ||
3055 | count = be32_to_cpu(icl->icl_count); | ||
3056 | if (!count) { | ||
3057 | xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad count"); | ||
3058 | return EINVAL; | ||
3059 | } | ||
3060 | length = be32_to_cpu(icl->icl_length); | ||
3061 | if (!length || length >= mp->m_sb.sb_agblocks) { | ||
3062 | xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad length"); | ||
3063 | return EINVAL; | ||
3064 | } | ||
3065 | |||
3066 | /* existing allocation is fixed value */ | ||
3067 | ASSERT(count == XFS_IALLOC_INODES(mp)); | ||
3068 | ASSERT(length == XFS_IALLOC_BLOCKS(mp)); | ||
3069 | if (count != XFS_IALLOC_INODES(mp) || | ||
3070 | length != XFS_IALLOC_BLOCKS(mp)) { | ||
3071 | xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad count 2"); | ||
3072 | return EINVAL; | ||
3073 | } | ||
3074 | |||
3075 | /* | ||
3076 | * Inode buffers can be freed. Do not replay the inode initialisation as | ||
3077 | * we could be overwriting something written after this inode buffer was | ||
3078 | * cancelled. | ||
3079 | * | ||
3080 | * XXX: we need to iterate all buffers and only init those that are not | ||
3081 | * cancelled. I think that a more fine grained factoring of | ||
3082 | * xfs_ialloc_inode_init may be appropriate here to enable this to be | ||
3083 | * done easily. | ||
3084 | */ | ||
3085 | if (xlog_check_buffer_cancelled(log, | ||
3086 | XFS_AGB_TO_DADDR(mp, agno, agbno), length, 0)) | ||
3087 | return 0; | ||
3088 | |||
3089 | xfs_ialloc_inode_init(mp, NULL, buffer_list, agno, agbno, length, | ||
3090 | be32_to_cpu(icl->icl_gen)); | ||
3091 | return 0; | ||
3092 | } | ||
3093 | |||
3094 | /* | ||
2985 | * Free up any resources allocated by the transaction | 3095 | * Free up any resources allocated by the transaction |
2986 | * | 3096 | * |
2987 | * Remember that EFIs, EFDs, and IUNLINKs are handled later. | 3097 | * Remember that EFIs, EFDs, and IUNLINKs are handled later. |
@@ -3023,6 +3133,7 @@ xlog_recover_commit_pass1( | |||
3023 | case XFS_LI_EFI: | 3133 | case XFS_LI_EFI: |
3024 | case XFS_LI_EFD: | 3134 | case XFS_LI_EFD: |
3025 | case XFS_LI_DQUOT: | 3135 | case XFS_LI_DQUOT: |
3136 | case XFS_LI_ICREATE: | ||
3026 | /* nothing to do in pass 1 */ | 3137 | /* nothing to do in pass 1 */ |
3027 | return 0; | 3138 | return 0; |
3028 | default: | 3139 | default: |
@@ -3053,6 +3164,8 @@ xlog_recover_commit_pass2( | |||
3053 | return xlog_recover_efd_pass2(log, item); | 3164 | return xlog_recover_efd_pass2(log, item); |
3054 | case XFS_LI_DQUOT: | 3165 | case XFS_LI_DQUOT: |
3055 | return xlog_recover_dquot_pass2(log, buffer_list, item); | 3166 | return xlog_recover_dquot_pass2(log, buffer_list, item); |
3167 | case XFS_LI_ICREATE: | ||
3168 | return xlog_recover_do_icreate_pass2(log, buffer_list, item); | ||
3056 | case XFS_LI_QUOTAOFF: | 3169 | case XFS_LI_QUOTAOFF: |
3057 | /* nothing to do in pass2 */ | 3170 | /* nothing to do in pass2 */ |
3058 | return 0; | 3171 | return 0; |