aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs/xfs_log_recover.c
diff options
context:
space:
mode:
authorBrian Foster <bfoster@redhat.com>2015-08-18 19:59:38 -0400
committerDave Chinner <david@fromorbit.com>2015-08-18 19:59:38 -0400
commitfc0d1656964fc53fca84549df5a6bd4a16a29cdf (patch)
tree80a9c094299454c7b7ec655b9d0fd43a0ab381df /fs/xfs/xfs_log_recover.c
parent78d57e4593bf700e1a4447e3a7769da8dd0e0844 (diff)
xfs: fix broken icreate log item cancellation
Inode cluster buffers are invalidated and cancelled when inode chunks are freed to notify log recovery that previous logged updates to the metadata buffer should be skipped. This ensures that log recovery does not overwrite buffers that might have already been reused. On v4 filesystems, inode chunk allocation and inode updates are logged via the cluster buffers and thus cancellation is easily detected via buffer cancellation items. v5 filesystems use the new icreate transaction, which uses logical logging and ordered buffers to log a full inode chunk allocation at once. The resulting icreate item often spans multiple inode cluster buffers. Log recovery checks for cancelled buffers when processing icreate log items, but it has a couple problems. First, it uses the full length of the inode chunk rather than the cluster size. Second, it uses the length in FSB units rather than BB units. Either of these problems prevent icreate recovery from identifying cancelled buffers and thus inode initialization proceeds unconditionally. Update xlog_recover_do_icreate_pass2() to iterate the icreate range in cluster sized increments and check each increment for cancellation. Since icreate is currently only used for the minimum atomic inode chunk allocation, we expect that either all or none of the buffers will be cancelled. Cancel the icreate if at least one buffer is cancelled to avoid making a bad situation worse by initializing a partial inode chunk, but detect such anomalies and warn the user. Signed-off-by: Brian Foster <bfoster@redhat.com> Reviewed-by: Dave Chinner <dchinner@redhat.com> Signed-off-by: Dave Chinner <david@fromorbit.com>
Diffstat (limited to 'fs/xfs/xfs_log_recover.c')
-rw-r--r--fs/xfs/xfs_log_recover.c49
1 files changed, 37 insertions, 12 deletions
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 0c6641b39e2a..2fa55e1c2b73 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -3032,6 +3032,11 @@ xlog_recover_do_icreate_pass2(
3032 unsigned int count; 3032 unsigned int count;
3033 unsigned int isize; 3033 unsigned int isize;
3034 xfs_agblock_t length; 3034 xfs_agblock_t length;
3035 int blks_per_cluster;
3036 int bb_per_cluster;
3037 int cancel_count;
3038 int nbufs;
3039 int i;
3035 3040
3036 icl = (struct xfs_icreate_log *)item->ri_buf[0].i_addr; 3041 icl = (struct xfs_icreate_log *)item->ri_buf[0].i_addr;
3037 if (icl->icl_type != XFS_LI_ICREATE) { 3042 if (icl->icl_type != XFS_LI_ICREATE) {
@@ -3090,25 +3095,45 @@ xlog_recover_do_icreate_pass2(
3090 } 3095 }
3091 3096
3092 /* 3097 /*
3093 * Inode buffers can be freed. Do not replay the inode initialisation as 3098 * The icreate transaction can cover multiple cluster buffers and these
3094 * we could be overwriting something written after this inode buffer was 3099 * buffers could have been freed and reused. Check the individual
3095 * cancelled. 3100 * buffers for cancellation so we don't overwrite anything written after
3101 * a cancellation.
3102 */
3103 blks_per_cluster = xfs_icluster_size_fsb(mp);
3104 bb_per_cluster = XFS_FSB_TO_BB(mp, blks_per_cluster);
3105 nbufs = length / blks_per_cluster;
3106 for (i = 0, cancel_count = 0; i < nbufs; i++) {
3107 xfs_daddr_t daddr;
3108
3109 daddr = XFS_AGB_TO_DADDR(mp, agno,
3110 agbno + i * blks_per_cluster);
3111 if (xlog_check_buffer_cancelled(log, daddr, bb_per_cluster, 0))
3112 cancel_count++;
3113 }
3114
3115 /*
3116 * We currently only use icreate for a single allocation at a time. This
3117 * means we should expect either all or none of the buffers to be
3118 * cancelled. Be conservative and skip replay if at least one buffer is
3119 * cancelled, but warn the user that something is awry if the buffers
3120 * are not consistent.
3096 * 3121 *
3097 * XXX: we need to iterate all buffers and only init those that are not 3122 * XXX: This must be refined to only skip cancelled clusters once we use
3098 * cancelled. I think that a more fine grained factoring of 3123 * icreate for multiple chunk allocations.
3099 * xfs_ialloc_inode_init may be appropriate here to enable this to be
3100 * done easily.
3101 */ 3124 */
3102 if (xlog_check_buffer_cancelled(log, 3125 ASSERT(!cancel_count || cancel_count == nbufs);
3103 XFS_AGB_TO_DADDR(mp, agno, agbno), length, 0)) { 3126 if (cancel_count) {
3127 if (cancel_count != nbufs)
3128 xfs_warn(mp,
3129 "WARNING: partial inode chunk cancellation, skipped icreate.");
3104 trace_xfs_log_recover_icreate_cancel(log, icl); 3130 trace_xfs_log_recover_icreate_cancel(log, icl);
3105 return 0; 3131 return 0;
3106 } 3132 }
3107 3133
3108 trace_xfs_log_recover_icreate_recover(log, icl); 3134 trace_xfs_log_recover_icreate_recover(log, icl);
3109 xfs_ialloc_inode_init(mp, NULL, buffer_list, count, agno, agbno, length, 3135 return xfs_ialloc_inode_init(mp, NULL, buffer_list, count, agno, agbno,
3110 be32_to_cpu(icl->icl_gen)); 3136 length, be32_to_cpu(icl->icl_gen));
3111 return 0;
3112} 3137}
3113 3138
3114STATIC void 3139STATIC void