Merge tag 'xfs-for-linus-4.5-rc7' of git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs

Pull xfs fixes from Dave Chinner: "This is a fix for a regression introduced in 4.5-rc1 by the new torn log write detection code. The regression only affects people moving a clean filesystem between machines/kernels of different architecture (such as changing between 32 bit and 64 bit kernels), but this is the recommended (and only!) safe way to migrate a filesystem between architectures so we really need to ensure it works. The changes are larger than I'd prefer right at the end of the release cycle, but the majority of the change is just factoring code to enable the detection of a clean log at the correct time to avoid this issue. Changes: - Only perform torn log write detection on dirty logs. This prevents failures being detected due to a clean filesystem being moved between machines or kernels of different architectures (e.g. 32 -> 64 bit, BE -> LE, etc). This fixes a regression introduced by the torn log write detection in 4.5-rc1" * tag 'xfs-for-linus-4.5-rc7' of git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs: xfs: only run torn log write detection on dirty logs xfs: refactor in-core log state update to helper xfs: refactor unmount record detection into helper xfs: separate log head record discovery from verification
author: Linus Torvalds <torvalds@linux-foundation.org> 2016-03-11 13:21:32 -0500
committer: Linus Torvalds <torvalds@linux-foundation.org> 2016-03-11 13:21:32 -0500
commit: 2a62ec0af2ed3758dd4535eb58527f63ba6549ad (patch)
tree: b33a15c9d8e14e369688b169958292ba42200d0e /fs
parent: 63cf207e931894b93fe5471131e41fda567611e2 (diff)
parent: 7f6aff3a29b08fc4234c8136eb1ac31b4897522c (diff)
1 files changed, 168 insertions, 103 deletions
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 594f7e63b432..be5568839442 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -1109,27 +1109,10 @@ xlog_verify_head(
        bool                    tmp_wrapped;
        /*
-         * Search backwards through the log looking for the log record header
+         * Check the head of the log for torn writes. Search backwards from the
-         * block. This wraps all the way back around to the head so something is
+         * head until we hit the tail or the maximum number of log record I/Os
-         * seriously wrong if we can't find it.
+         * that could have been in flight at one time. Use a temporary buffer so
-         */
+         * we don't trash the rhead/bp pointers from the caller.
-        found = xlog_rseek_logrec_hdr(log, *head_blk, *head_blk, 1, bp, rhead_blk,
-                                      rhead, wrapped);
-        if (found < 0)
-                return found;
-        if (!found) {
-                xfs_warn(log->l_mp, "%s: couldn't find sync record", __func__);
-                return -EIO;
-        }
-        *tail_blk = BLOCK_LSN(be64_to_cpu((*rhead)->h_tail_lsn));
-        /*
-         * Now that we have a tail block, check the head of the log for torn
-         * writes. Search again until we hit the tail or the maximum number of
-         * log record I/Os that could have been in flight at one time. Use a
-         * temporary buffer so we don't trash the rhead/bp pointer from the
-         * call above.
         */
        tmp_bp = xlog_get_bp(log, 1);
        if (!tmp_bp)
@@ -1216,6 +1199,115 @@ xlog_verify_head(
 }
 /*
+ * Check whether the head of the log points to an unmount record. In other
+ * words, determine whether the log is clean. If so, update the in-core state
+ * appropriately.
+ */
+static int
+xlog_check_unmount_rec(
+        struct xlog             *log,
+        xfs_daddr_t             *head_blk,
+        xfs_daddr_t             *tail_blk,
+        struct xlog_rec_header  *rhead,
+        xfs_daddr_t             rhead_blk,
+        struct xfs_buf          *bp,
+        bool                    *clean)
+{
+        struct xlog_op_header   *op_head;
+        xfs_daddr_t             umount_data_blk;
+        xfs_daddr_t             after_umount_blk;
+        int                     hblks;
+        int                     error;
+        char                    *offset;
+        *clean = false;
+        /*
+         * Look for unmount record. If we find it, then we know there was a
+         * clean unmount. Since 'i' could be the last block in the physical
+         * log, we convert to a log block before comparing to the head_blk.
+         *
+         * Save the current tail lsn to use to pass to xlog_clear_stale_blocks()
+         * below. We won't want to clear the unmount record if there is one, so
+         * we pass the lsn of the unmount record rather than the block after it.
+         */
+        if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
+                int     h_size = be32_to_cpu(rhead->h_size);
+                int     h_version = be32_to_cpu(rhead->h_version);
+                if ((h_version & XLOG_VERSION_2) &&
+                    (h_size > XLOG_HEADER_CYCLE_SIZE)) {
+                        hblks = h_size / XLOG_HEADER_CYCLE_SIZE;
+                        if (h_size % XLOG_HEADER_CYCLE_SIZE)
+                                hblks++;
+                } else {
+                        hblks = 1;
+                }
+        } else {
+                hblks = 1;
+        }
+        after_umount_blk = rhead_blk + hblks + BTOBB(be32_to_cpu(rhead->h_len));
+        after_umount_blk = do_mod(after_umount_blk, log->l_logBBsize);
+        if (*head_blk == after_umount_blk &&
+            be32_to_cpu(rhead->h_num_logops) == 1) {
+                umount_data_blk = rhead_blk + hblks;
+                umount_data_blk = do_mod(umount_data_blk, log->l_logBBsize);
+                error = xlog_bread(log, umount_data_blk, 1, bp, &offset);
+                if (error)
+                        return error;
+                op_head = (struct xlog_op_header *)offset;
+                if (op_head->oh_flags & XLOG_UNMOUNT_TRANS) {
+                        /*
+                         * Set tail and last sync so that newly written log
+                         * records will point recovery to after the current
+                         * unmount record.
+                         */
+                        xlog_assign_atomic_lsn(&log->l_tail_lsn,
+                                        log->l_curr_cycle, after_umount_blk);
+                        xlog_assign_atomic_lsn(&log->l_last_sync_lsn,
+                                        log->l_curr_cycle, after_umount_blk);
+                        *tail_blk = after_umount_blk;
+                        *clean = true;
+                }
+        }
+        return 0;
+}
+static void
+xlog_set_state(
+        struct xlog             *log,
+        xfs_daddr_t             head_blk,
+        struct xlog_rec_header  *rhead,
+        xfs_daddr_t             rhead_blk,
+        bool                    bump_cycle)
+{
+        /*
+         * Reset log values according to the state of the log when we
+         * crashed.  In the case where head_blk == 0, we bump curr_cycle
+         * one because the next write starts a new cycle rather than
+         * continuing the cycle of the last good log record.  At this
+         * point we have guaranteed that all partial log records have been
+         * accounted for.  Therefore, we know that the last good log record
+         * written was complete and ended exactly on the end boundary
+         * of the physical log.
+         */
+        log->l_prev_block = rhead_blk;
+        log->l_curr_block = (int)head_blk;
+        log->l_curr_cycle = be32_to_cpu(rhead->h_cycle);
+        if (bump_cycle)
+                log->l_curr_cycle++;
+        atomic64_set(&log->l_tail_lsn, be64_to_cpu(rhead->h_tail_lsn));
+        atomic64_set(&log->l_last_sync_lsn, be64_to_cpu(rhead->h_lsn));
+        xlog_assign_grant_head(&log->l_reserve_head.grant, log->l_curr_cycle,
+                                        BBTOB(log->l_curr_block));
+        xlog_assign_grant_head(&log->l_write_head.grant, log->l_curr_cycle,
+                                        BBTOB(log->l_curr_block));
+}
+/*
 * Find the sync block number or the tail of the log.
 *
 * This will be the block number of the last record to have its
@@ -1238,22 +1330,20 @@ xlog_find_tail(
        xfs_daddr_t             *tail_blk)
 {
        xlog_rec_header_t       *rhead;
-        xlog_op_header_t        *op_head;
        char                    *offset = NULL;
        xfs_buf_t               *bp;
        int                     error;
-        xfs_daddr_t             umount_data_blk;
-        xfs_daddr_t             after_umount_blk;
        xfs_daddr_t             rhead_blk;
        xfs_lsn_t               tail_lsn;
-        int                     hblks;
        bool                    wrapped = false;
+        bool                    clean = false;
        /*
         * Find previous log record
         */
        if ((error = xlog_find_head(log, head_blk)))
                return error;
+        ASSERT(*head_blk < INT_MAX);
        bp = xlog_get_bp(log, 1);
        if (!bp)
@@ -1271,100 +1361,75 @@ xlog_find_tail(
        }
        /*
-         * Trim the head block back to skip over torn records. We can have
+         * Search backwards through the log looking for the log record header
-         * multiple log I/Os in flight at any time, so we assume CRC failures
+         * block. This wraps all the way back around to the head so something is
-         * back through the previous several records are torn writes and skip
+         * seriously wrong if we can't find it.
-         * them.
         */
-        ASSERT(*head_blk < INT_MAX);
+        error = xlog_rseek_logrec_hdr(log, *head_blk, *head_blk, 1, bp,
-        error = xlog_verify_head(log, head_blk, tail_blk, bp, &rhead_blk,
+                                      &rhead_blk, &rhead, &wrapped);
-                                 &rhead, &wrapped);
+        if (error < 0)
-        if (error)
+                return error;
-                goto done;
+        if (!error) {
+                xfs_warn(log->l_mp, "%s: couldn't find sync record", __func__);
+                return -EIO;
+        }
+        *tail_blk = BLOCK_LSN(be64_to_cpu(rhead->h_tail_lsn));
        /*
-         * Reset log values according to the state of the log when we
+         * Set the log state based on the current head record.
-         * crashed.  In the case where head_blk == 0, we bump curr_cycle
-         * one because the next write starts a new cycle rather than
-         * continuing the cycle of the last good log record.  At this
-         * point we have guaranteed that all partial log records have been
-         * accounted for.  Therefore, we know that the last good log record
-         * written was complete and ended exactly on the end boundary
-         * of the physical log.
         */
-        log->l_prev_block = rhead_blk;
+        xlog_set_state(log, *head_blk, rhead, rhead_blk, wrapped);
-        log->l_curr_block = (int)*head_blk;
+        tail_lsn = atomic64_read(&log->l_tail_lsn);
-        log->l_curr_cycle = be32_to_cpu(rhead->h_cycle);
-        if (wrapped)
-                log->l_curr_cycle++;
-        atomic64_set(&log->l_tail_lsn, be64_to_cpu(rhead->h_tail_lsn));
-        atomic64_set(&log->l_last_sync_lsn, be64_to_cpu(rhead->h_lsn));
-        xlog_assign_grant_head(&log->l_reserve_head.grant, log->l_curr_cycle,
-                                        BBTOB(log->l_curr_block));
-        xlog_assign_grant_head(&log->l_write_head.grant, log->l_curr_cycle,
-                                        BBTOB(log->l_curr_block));
        /*
-         * Look for unmount record.  If we find it, then we know there
+         * Look for an unmount record at the head of the log. This sets the log
-         * was a clean unmount.  Since 'i' could be the last block in
+         * state to determine whether recovery is necessary.
-         * the physical log, we convert to a log block before comparing
+         */
-         * to the head_blk.
+        error = xlog_check_unmount_rec(log, head_blk, tail_blk, rhead,
+                                       rhead_blk, bp, &clean);
+        if (error)
+                goto done;
+        /*
+         * Verify the log head if the log is not clean (e.g., we have anything
+         * but an unmount record at the head). This uses CRC verification to
+         * detect and trim torn writes. If discovered, CRC failures are
+         * considered torn writes and the log head is trimmed accordingly.
         *
-         * Save the current tail lsn to use to pass to
+         * Note that we can only run CRC verification when the log is dirty
-         * xlog_clear_stale_blocks() below.  We won't want to clear the
+         * because there's no guarantee that the log data behind an unmount
-         * unmount record if there is one, so we pass the lsn of the
+         * record is compatible with the current architecture.
-         * unmount record rather than the block after it.
         */
-        if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
+        if (!clean) {
-                int     h_size = be32_to_cpu(rhead->h_size);
+                xfs_daddr_t     orig_head = *head_blk;
-                int     h_version = be32_to_cpu(rhead->h_version);
-                if ((h_version & XLOG_VERSION_2) &&
+                error = xlog_verify_head(log, head_blk, tail_blk, bp,
-                    (h_size > XLOG_HEADER_CYCLE_SIZE)) {
+                                         &rhead_blk, &rhead, &wrapped);
-                        hblks = h_size / XLOG_HEADER_CYCLE_SIZE;
-                        if (h_size % XLOG_HEADER_CYCLE_SIZE)
-                                hblks++;
-                } else {
-                        hblks = 1;
-                }
-        } else {
-                hblks = 1;
-        }
-        after_umount_blk = rhead_blk + hblks + BTOBB(be32_to_cpu(rhead->h_len));
-        after_umount_blk = do_mod(after_umount_blk, log->l_logBBsize);
-        tail_lsn = atomic64_read(&log->l_tail_lsn);
-        if (*head_blk == after_umount_blk &&
-            be32_to_cpu(rhead->h_num_logops) == 1) {
-                umount_data_blk = rhead_blk + hblks;
-                umount_data_blk = do_mod(umount_data_blk, log->l_logBBsize);
-                error = xlog_bread(log, umount_data_blk, 1, bp, &offset);
                if (error)
                        goto done;
-                op_head = (xlog_op_header_t *)offset;
+                /* update in-core state again if the head changed */
-                if (op_head->oh_flags & XLOG_UNMOUNT_TRANS) {
+                if (*head_blk != orig_head) {
-                        /*
+                        xlog_set_state(log, *head_blk, rhead, rhead_blk,
-                         * Set tail and last sync so that newly written
+                                       wrapped);
-                         * log records will point recovery to after the
+                        tail_lsn = atomic64_read(&log->l_tail_lsn);
-                         * current unmount record.
+                        error = xlog_check_unmount_rec(log, head_blk, tail_blk,
-                         */
+                                                       rhead, rhead_blk, bp,
-                        xlog_assign_atomic_lsn(&log->l_tail_lsn,
+                                                       &clean);
-                                        log->l_curr_cycle, after_umount_blk);
+                        if (error)
-                        xlog_assign_atomic_lsn(&log->l_last_sync_lsn,
+                                goto done;
-                                        log->l_curr_cycle, after_umount_blk);
-                        *tail_blk = after_umount_blk;
-                        /*
-                         * Note that the unmount was clean. If the unmount
-                         * was not clean, we need to know this to rebuild the
-                         * superblock counters from the perag headers if we
-                         * have a filesystem using non-persistent counters.
-                         */
-                        log->l_mp->m_flags |= XFS_MOUNT_WAS_CLEAN;
                }
        }
        /*
+         * Note that the unmount was clean. If the unmount was not clean, we
+         * need to know this to rebuild the superblock counters from the perag
+         * headers if we have a filesystem using non-persistent counters.
+         */
+        if (clean)
+                log->l_mp->m_flags |= XFS_MOUNT_WAS_CLEAN;
+        /*
         * Make sure that there are no blocks in front of the head
         * with the same cycle number as the head.  This can happen
         * because we allow multiple outstanding log writes concurrently,
author	Linus Torvalds <torvalds@linux-foundation.org>	2016-03-11 13:21:32 -0500
committer	Linus Torvalds <torvalds@linux-foundation.org>	2016-03-11 13:21:32 -0500
commit	2a62ec0af2ed3758dd4535eb58527f63ba6549ad (patch)
tree	b33a15c9d8e14e369688b169958292ba42200d0e /fs
parent	63cf207e931894b93fe5471131e41fda567611e2 (diff)
parent	7f6aff3a29b08fc4234c8136eb1ac31b4897522c (diff)