1 files changed, 168 insertions, 103 deletions
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 594f7e63b432..be5568839442 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -1109,27 +1109,10 @@ xlog_verify_head(
        bool                    tmp_wrapped;
        /*
-         * Search backwards through the log looking for the log record header
+         * Check the head of the log for torn writes. Search backwards from the
-         * block. This wraps all the way back around to the head so something is
+         * head until we hit the tail or the maximum number of log record I/Os
-         * seriously wrong if we can't find it.
+         * that could have been in flight at one time. Use a temporary buffer so
-         */
+         * we don't trash the rhead/bp pointers from the caller.
-        found = xlog_rseek_logrec_hdr(log, *head_blk, *head_blk, 1, bp, rhead_blk,
-                                      rhead, wrapped);
-        if (found < 0)
-                return found;
-        if (!found) {
-                xfs_warn(log->l_mp, "%s: couldn't find sync record", __func__);
-                return -EIO;
-        }
-        *tail_blk = BLOCK_LSN(be64_to_cpu((*rhead)->h_tail_lsn));
-        /*
-         * Now that we have a tail block, check the head of the log for torn
-         * writes. Search again until we hit the tail or the maximum number of
-         * log record I/Os that could have been in flight at one time. Use a
-         * temporary buffer so we don't trash the rhead/bp pointer from the
-         * call above.
         */
        tmp_bp = xlog_get_bp(log, 1);
        if (!tmp_bp)
@@ -1216,6 +1199,115 @@ xlog_verify_head(
 }
 /*
+ * Check whether the head of the log points to an unmount record. In other
+ * words, determine whether the log is clean. If so, update the in-core state
+ * appropriately.
+ */
+static int
+xlog_check_unmount_rec(
+        struct xlog             *log,
+        xfs_daddr_t             *head_blk,
+        xfs_daddr_t             *tail_blk,
+        struct xlog_rec_header  *rhead,
+        xfs_daddr_t             rhead_blk,
+        struct xfs_buf          *bp,
+        bool                    *clean)
+{
+        struct xlog_op_header   *op_head;
+        xfs_daddr_t             umount_data_blk;
+        xfs_daddr_t             after_umount_blk;
+        int                     hblks;
+        int                     error;
+        char                    *offset;
+        *clean = false;
+        /*
+         * Look for unmount record. If we find it, then we know there was a
+         * clean unmount. Since 'i' could be the last block in the physical
+         * log, we convert to a log block before comparing to the head_blk.
+         *
+         * Save the current tail lsn to use to pass to xlog_clear_stale_blocks()
+         * below. We won't want to clear the unmount record if there is one, so
+         * we pass the lsn of the unmount record rather than the block after it.
+         */
+        if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
+                int     h_size = be32_to_cpu(rhead->h_size);
+                int     h_version = be32_to_cpu(rhead->h_version);
+                if ((h_version & XLOG_VERSION_2) &&
+                    (h_size > XLOG_HEADER_CYCLE_SIZE)) {
+                        hblks = h_size / XLOG_HEADER_CYCLE_SIZE;
+                        if (h_size % XLOG_HEADER_CYCLE_SIZE)
+                                hblks++;
+                } else {
+                        hblks = 1;
+                }
+        } else {
+                hblks = 1;
+        }
+        after_umount_blk = rhead_blk + hblks + BTOBB(be32_to_cpu(rhead->h_len));
+        after_umount_blk = do_mod(after_umount_blk, log->l_logBBsize);
+        if (*head_blk == after_umount_blk &&
+            be32_to_cpu(rhead->h_num_logops) == 1) {
+                umount_data_blk = rhead_blk + hblks;
+                umount_data_blk = do_mod(umount_data_blk, log->l_logBBsize);
+                error = xlog_bread(log, umount_data_blk, 1, bp, &offset);
+                if (error)
+                        return error;
+                op_head = (struct xlog_op_header *)offset;
+                if (op_head->oh_flags & XLOG_UNMOUNT_TRANS) {
+                        /*
+                         * Set tail and last sync so that newly written log
+                         * records will point recovery to after the current
+                         * unmount record.
+                         */
+                        xlog_assign_atomic_lsn(&log->l_tail_lsn,
+                                        log->l_curr_cycle, after_umount_blk);
+                        xlog_assign_atomic_lsn(&log->l_last_sync_lsn,
+                                        log->l_curr_cycle, after_umount_blk);
+                        *tail_blk = after_umount_blk;
+                        *clean = true;
+                }
+        }
+        return 0;
+}
+static void
+xlog_set_state(
+        struct xlog             *log,
+        xfs_daddr_t             head_blk,
+        struct xlog_rec_header  *rhead,
+        xfs_daddr_t             rhead_blk,
+        bool                    bump_cycle)
+{
+        /*
+         * Reset log values according to the state of the log when we
+         * crashed.  In the case where head_blk == 0, we bump curr_cycle
+         * one because the next write starts a new cycle rather than
+         * continuing the cycle of the last good log record.  At this
+         * point we have guaranteed that all partial log records have been
+         * accounted for.  Therefore, we know that the last good log record
+         * written was complete and ended exactly on the end boundary
+         * of the physical log.
+         */
+        log->l_prev_block = rhead_blk;
+        log->l_curr_block = (int)head_blk;
+        log->l_curr_cycle = be32_to_cpu(rhead->h_cycle);
+        if (bump_cycle)
+                log->l_curr_cycle++;
+        atomic64_set(&log->l_tail_lsn, be64_to_cpu(rhead->h_tail_lsn));
+        atomic64_set(&log->l_last_sync_lsn, be64_to_cpu(rhead->h_lsn));
+        xlog_assign_grant_head(&log->l_reserve_head.grant, log->l_curr_cycle,
+                                        BBTOB(log->l_curr_block));
+        xlog_assign_grant_head(&log->l_write_head.grant, log->l_curr_cycle,
+                                        BBTOB(log->l_curr_block));
+}
+/*
 * Find the sync block number or the tail of the log.
 *
 * This will be the block number of the last record to have its
@@ -1238,22 +1330,20 @@ xlog_find_tail(
        xfs_daddr_t             *tail_blk)
 {
        xlog_rec_header_t       *rhead;
-        xlog_op_header_t        *op_head;
        char                    *offset = NULL;
        xfs_buf_t               *bp;
        int                     error;
-        xfs_daddr_t             umount_data_blk;
-        xfs_daddr_t             after_umount_blk;
        xfs_daddr_t             rhead_blk;
        xfs_lsn_t               tail_lsn;
-        int                     hblks;
        bool                    wrapped = false;
+        bool                    clean = false;
        /*
         * Find previous log record
         */
        if ((error = xlog_find_head(log, head_blk)))
                return error;
+        ASSERT(*head_blk < INT_MAX);
        bp = xlog_get_bp(log, 1);
        if (!bp)
@@ -1271,100 +1361,75 @@ xlog_find_tail(
        }
        /*
-         * Trim the head block back to skip over torn records. We can have
+         * Search backwards through the log looking for the log record header
-         * multiple log I/Os in flight at any time, so we assume CRC failures
+         * block. This wraps all the way back around to the head so something is
-         * back through the previous several records are torn writes and skip
+         * seriously wrong if we can't find it.
-         * them.
         */
-        ASSERT(*head_blk < INT_MAX);
+        error = xlog_rseek_logrec_hdr(log, *head_blk, *head_blk, 1, bp,
-        error = xlog_verify_head(log, head_blk, tail_blk, bp, &rhead_blk,
+                                      &rhead_blk, &rhead, &wrapped);
-                                 &rhead, &wrapped);
+        if (error < 0)
-        if (error)
+                return error;
-                goto done;
+        if (!error) {
+                xfs_warn(log->l_mp, "%s: couldn't find sync record", __func__);
+                return -EIO;
+        }
+        *tail_blk = BLOCK_LSN(be64_to_cpu(rhead->h_tail_lsn));
        /*
-         * Reset log values according to the state of the log when we
+         * Set the log state based on the current head record.
-         * crashed.  In the case where head_blk == 0, we bump curr_cycle
-         * one because the next write starts a new cycle rather than
-         * continuing the cycle of the last good log record.  At this
-         * point we have guaranteed that all partial log records have been
-         * accounted for.  Therefore, we know that the last good log record
-         * written was complete and ended exactly on the end boundary
-         * of the physical log.
         */
-        log->l_prev_block = rhead_blk;
+        xlog_set_state(log, *head_blk, rhead, rhead_blk, wrapped);
-        log->l_curr_block = (int)*head_blk;
+        tail_lsn = atomic64_read(&log->l_tail_lsn);
-        log->l_curr_cycle = be32_to_cpu(rhead->h_cycle);
-        if (wrapped)
-                log->l_curr_cycle++;
-        atomic64_set(&log->l_tail_lsn, be64_to_cpu(rhead->h_tail_lsn));
-        atomic64_set(&log->l_last_sync_lsn, be64_to_cpu(rhead->h_lsn));
-        xlog_assign_grant_head(&log->l_reserve_head.grant, log->l_curr_cycle,
-                                        BBTOB(log->l_curr_block));
-        xlog_assign_grant_head(&log->l_write_head.grant, log->l_curr_cycle,
-                                        BBTOB(log->l_curr_block));
        /*
-         * Look for unmount record.  If we find it, then we know there
+         * Look for an unmount record at the head of the log. This sets the log
-         * was a clean unmount.  Since 'i' could be the last block in
+         * state to determine whether recovery is necessary.
-         * the physical log, we convert to a log block before comparing
+         */
-         * to the head_blk.
+        error = xlog_check_unmount_rec(log, head_blk, tail_blk, rhead,
+                                       rhead_blk, bp, &clean);
+        if (error)
+                goto done;
+        /*
+         * Verify the log head if the log is not clean (e.g., we have anything
+         * but an unmount record at the head). This uses CRC verification to
+         * detect and trim torn writes. If discovered, CRC failures are
+         * considered torn writes and the log head is trimmed accordingly.
         *
-         * Save the current tail lsn to use to pass to
+         * Note that we can only run CRC verification when the log is dirty
-         * xlog_clear_stale_blocks() below.  We won't want to clear the
+         * because there's no guarantee that the log data behind an unmount
-         * unmount record if there is one, so we pass the lsn of the
+         * record is compatible with the current architecture.
-         * unmount record rather than the block after it.
         */
-        if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
+        if (!clean) {
-                int     h_size = be32_to_cpu(rhead->h_size);
+                xfs_daddr_t     orig_head = *head_blk;
-                int     h_version = be32_to_cpu(rhead->h_version);
-                if ((h_version & XLOG_VERSION_2) &&
+                error = xlog_verify_head(log, head_blk, tail_blk, bp,
-                    (h_size > XLOG_HEADER_CYCLE_SIZE)) {
+                                         &rhead_blk, &rhead, &wrapped);
-                        hblks = h_size / XLOG_HEADER_CYCLE_SIZE;
-                        if (h_size % XLOG_HEADER_CYCLE_SIZE)
-                                hblks++;
-                } else {
-                        hblks = 1;
-                }
-        } else {
-                hblks = 1;
-        }
-        after_umount_blk = rhead_blk + hblks + BTOBB(be32_to_cpu(rhead->h_len));
-        after_umount_blk = do_mod(after_umount_blk, log->l_logBBsize);
-        tail_lsn = atomic64_read(&log->l_tail_lsn);
-        if (*head_blk == after_umount_blk &&
-            be32_to_cpu(rhead->h_num_logops) == 1) {
-                umount_data_blk = rhead_blk + hblks;
-                umount_data_blk = do_mod(umount_data_blk, log->l_logBBsize);
-                error = xlog_bread(log, umount_data_blk, 1, bp, &offset);
                if (error)
                        goto done;
-                op_head = (xlog_op_header_t *)offset;
+                /* update in-core state again if the head changed */
-                if (op_head->oh_flags & XLOG_UNMOUNT_TRANS) {
+                if (*head_blk != orig_head) {
-                        /*
+                        xlog_set_state(log, *head_blk, rhead, rhead_blk,
-                         * Set tail and last sync so that newly written
+                                       wrapped);
-                         * log records will point recovery to after the
+                        tail_lsn = atomic64_read(&log->l_tail_lsn);
-                         * current unmount record.
+                        error = xlog_check_unmount_rec(log, head_blk, tail_blk,
-                         */
+                                                       rhead, rhead_blk, bp,
-                        xlog_assign_atomic_lsn(&log->l_tail_lsn,
+                                                       &clean);
-                                        log->l_curr_cycle, after_umount_blk);
+                        if (error)
-                        xlog_assign_atomic_lsn(&log->l_last_sync_lsn,
+                                goto done;
-                                        log->l_curr_cycle, after_umount_blk);
-                        *tail_blk = after_umount_blk;
-                        /*
-                         * Note that the unmount was clean. If the unmount
-                         * was not clean, we need to know this to rebuild the
-                         * superblock counters from the perag headers if we
-                         * have a filesystem using non-persistent counters.
-                         */
-                        log->l_mp->m_flags |= XFS_MOUNT_WAS_CLEAN;
                }
        }
        /*
+         * Note that the unmount was clean. If the unmount was not clean, we
+         * need to know this to rebuild the superblock counters from the perag
+         * headers if we have a filesystem using non-persistent counters.
+         */
+        if (clean)
+                log->l_mp->m_flags |= XFS_MOUNT_WAS_CLEAN;
+        /*
         * Make sure that there are no blocks in front of the head
         * with the same cycle number as the head.  This can happen
         * because we allow multiple outstanding log writes concurrently,