jbd2: issue cache flush after checkpointing even with internal journal

When we reach jbd2_cleanup_journal_tail(), there is no guarantee that checkpointed buffers are on a stable storage - especially if buffers were written out by jbd2_log_do_checkpoint(), they are likely to be only in disk's caches. Thus when we update journal superblock effectively removing old transaction from journal, this write of superblock can get to stable storage before those checkpointed buffers which can result in filesystem corruption after a crash. Thus we must unconditionally issue a cache flush before we update journal superblock in these cases. A similar problem can also occur if journal superblock is written only in disk's caches, other transaction starts reusing space of the transaction cleaned from the log and power failure happens. Subsequent journal replay would still try to replay the old transaction but some of it's blocks may be already overwritten by the new transaction. For this reason we must use WRITE_FUA when updating log tail and we must first write new log tail to disk and update in-memory information only after that. Signed-off-by: Jan Kara <jack@suse.cz> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
author: Jan Kara <jack@suse.cz> 2012-03-13 22:22:54 -0400
committer: Theodore Ts'o <tytso@mit.edu> 2012-03-13 22:22:54 -0400
commit: 79feb521a44705262d15cc819a4117a447b11ea7 (patch)
tree: a4de6ed084b7a68c0885049d94841ce8334b64a7 /fs
parent: a78bb11d7acd525623c6a0c2ff4e213d527573fa (diff)
4 files changed, 143 insertions, 86 deletions
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index 19dcd0b86bca..7f7ee5b90402 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -478,79 +478,28 @@ out:
 int jbd2_cleanup_journal_tail(journal_t *journal)
 {
-        transaction_t * transaction;
        tid_t           first_tid;
-        unsigned long   blocknr, freed;
+        unsigned long   blocknr;
        if (is_journal_aborted(journal))
                return 1;
-        /* OK, work out the oldest transaction remaining in the log, and
+        if (!jbd2_journal_get_log_tail(journal, &first_tid, &blocknr))
-         * the log block it starts at.
-         *
-         * If the log is now empty, we need to work out which is the
-         * next transaction ID we will write, and where it will
-         * start. */
-        write_lock(&journal->j_state_lock);
-        spin_lock(&journal->j_list_lock);
-        transaction = journal->j_checkpoint_transactions;
-        if (transaction) {
-                first_tid = transaction->t_tid;
-                blocknr = transaction->t_log_start;
-        } else if ((transaction = journal->j_committing_transaction) != NULL) {
-                first_tid = transaction->t_tid;
-                blocknr = transaction->t_log_start;
-        } else if ((transaction = journal->j_running_transaction) != NULL) {
-                first_tid = transaction->t_tid;
-                blocknr = journal->j_head;
-        } else {
-                first_tid = journal->j_transaction_sequence;
-                blocknr = journal->j_head;
-        }
-        spin_unlock(&journal->j_list_lock);
-        J_ASSERT(blocknr != 0);
-        /* If the oldest pinned transaction is at the tail of the log
-           already then there's not much we can do right now. */
-        if (journal->j_tail_sequence == first_tid) {
-                write_unlock(&journal->j_state_lock);
                return 1;
-        }
+        J_ASSERT(blocknr != 0);
-        /* OK, update the superblock to recover the freed space.
-         * Physical blocks come first: have we wrapped beyond the end of
-         * the log?  */
-        freed = blocknr - journal->j_tail;
-        if (blocknr < journal->j_tail)
-                freed = freed + journal->j_last - journal->j_first;
-        trace_jbd2_cleanup_journal_tail(journal, first_tid, blocknr, freed);
-        jbd_debug(1,
-                  "Cleaning journal tail from %d to %d (offset %lu), "
-                  "freeing %lu\n",
-                  journal->j_tail_sequence, first_tid, blocknr, freed);
-        journal->j_free += freed;
-        journal->j_tail_sequence = first_tid;
-        journal->j_tail = blocknr;
-        write_unlock(&journal->j_state_lock);
        /*
-         * If there is an external journal, we need to make sure that
+         * We need to make sure that any blocks that were recently written out
-         * any data blocks that were recently written out --- perhaps
+         * --- perhaps by jbd2_log_do_checkpoint() --- are flushed out before
-         * by jbd2_log_do_checkpoint() --- are flushed out before we
+         * we drop the transactions from the journal. It's unlikely this will
-         * drop the transactions from the external journal.  It's
+         * be necessary, especially with an appropriately sized journal, but we
-         * unlikely this will be necessary, especially with a
+         * need this to guarantee correctness.  Fortunately
-         * appropriately sized journal, but we need this to guarantee
+         * jbd2_cleanup_journal_tail() doesn't get called all that often.
-         * correctness.  Fortunately jbd2_cleanup_journal_tail()
-         * doesn't get called all that often.
         */
-        if ((journal->j_fs_dev != journal->j_dev) &&
+        if (journal->j_flags & JBD2_BARRIER)
-            (journal->j_flags & JBD2_BARRIER))
                blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL);
-        if (!(journal->j_flags & JBD2_ABORT))
-                jbd2_journal_update_sb_log_tail(journal);
+        __jbd2_update_log_tail(journal, first_tid, blocknr);
        return 0;
 }
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 6705717d9b7f..b89ef84786a7 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -341,7 +341,16 @@ void jbd2_journal_commit_transaction(journal_t *journal)
        if (journal->j_flags & JBD2_FLUSHED) {
                jbd_debug(3, "super block updated\n");
                mutex_lock(&journal->j_checkpoint_mutex);
-                jbd2_journal_update_sb_log_tail(journal);
+                /*
+                 * We hold j_checkpoint_mutex so tail cannot change under us.
+                 * We don't need any special data guarantees for writing sb
+                 * since journal is empty and it is ok for write to be
+                 * flushed only with transaction commit.
+                 */
+                jbd2_journal_update_sb_log_tail(journal,
+                                                journal->j_tail_sequence,
+                                                journal->j_tail,
+                                                WRITE_SYNC);
                mutex_unlock(&journal->j_checkpoint_mutex);
        } else {
                jbd_debug(3, "superblock not updated\n");
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index fc5f2acc9f18..c5ff177400ff 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -742,6 +742,85 @@ struct journal_head *jbd2_journal_get_descriptor_buffer(journal_t *journal)
        return jbd2_journal_add_journal_head(bh);
 }
+/*
+ * Return tid of the oldest transaction in the journal and block in the journal
+ * where the transaction starts.
+ *
+ * If the journal is now empty, return which will be the next transaction ID
+ * we will write and where will that transaction start.
+ *
+ * The return value is 0 if journal tail cannot be pushed any further, 1 if
+ * it can.
+ */
+int jbd2_journal_get_log_tail(journal_t *journal, tid_t *tid,
+                              unsigned long *block)
+{
+        transaction_t *transaction;
+        int ret;
+        read_lock(&journal->j_state_lock);
+        spin_lock(&journal->j_list_lock);
+        transaction = journal->j_checkpoint_transactions;
+        if (transaction) {
+                *tid = transaction->t_tid;
+                *block = transaction->t_log_start;
+        } else if ((transaction = journal->j_committing_transaction) != NULL) {
+                *tid = transaction->t_tid;
+                *block = transaction->t_log_start;
+        } else if ((transaction = journal->j_running_transaction) != NULL) {
+                *tid = transaction->t_tid;
+                *block = journal->j_head;
+        } else {
+                *tid = journal->j_transaction_sequence;
+                *block = journal->j_head;
+        }
+        ret = tid_gt(*tid, journal->j_tail_sequence);
+        spin_unlock(&journal->j_list_lock);
+        read_unlock(&journal->j_state_lock);
+        return ret;
+}
+/*
+ * Update information in journal structure and in on disk journal superblock
+ * about log tail. This function does not check whether information passed in
+ * really pushes log tail further. It's responsibility of the caller to make
+ * sure provided log tail information is valid (e.g. by holding
+ * j_checkpoint_mutex all the time between computing log tail and calling this
+ * function as is the case with jbd2_cleanup_journal_tail()).
+ *
+ * Requires j_checkpoint_mutex
+ */
+void __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block)
+{
+        unsigned long freed;
+        BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex));
+        /*
+         * We cannot afford for write to remain in drive's caches since as
+         * soon as we update j_tail, next transaction can start reusing journal
+         * space and if we lose sb update during power failure we'd replay
+         * old transaction with possibly newly overwritten data.
+         */
+        jbd2_journal_update_sb_log_tail(journal, tid, block, WRITE_FUA);
+        write_lock(&journal->j_state_lock);
+        freed = block - journal->j_tail;
+        if (block < journal->j_tail)
+                freed += journal->j_last - journal->j_first;
+        trace_jbd2_update_log_tail(journal, tid, block, freed);
+        jbd_debug(1,
+                  "Cleaning journal tail from %d to %d (offset %lu), "
+                  "freeing %lu\n",
+                  journal->j_tail_sequence, tid, block, freed);
+        journal->j_free += freed;
+        journal->j_tail_sequence = tid;
+        journal->j_tail = block;
+        write_unlock(&journal->j_state_lock);
+}
 struct jbd2_stats_proc_session {
        journal_t *journal;
        struct transaction_stats_s *stats;
@@ -1125,18 +1204,30 @@ static int journal_reset(journal_t *journal)
        } else {
                /* Lock here to make assertions happy... */
                mutex_lock(&journal->j_checkpoint_mutex);
-                /* Add the dynamic fields and write it to disk. */
+                /*
-                jbd2_journal_update_sb_log_tail(journal);
+                 * Update log tail information. We use WRITE_FUA since new
+                 * transaction will start reusing journal space and so we
+                 * must make sure information about current log tail is on
+                 * disk before that.
+                 */
+                jbd2_journal_update_sb_log_tail(journal,
+                                                journal->j_tail_sequence,
+                                                journal->j_tail,
+                                                WRITE_FUA);
                mutex_unlock(&journal->j_checkpoint_mutex);
        }
        return jbd2_journal_start_thread(journal);
 }
-static void jbd2_write_superblock(journal_t *journal)
+static void jbd2_write_superblock(journal_t *journal, int write_op)
 {
        struct buffer_head *bh = journal->j_sb_buffer;
+        int ret;
-        trace_jbd2_write_superblock(journal);
+        trace_jbd2_write_superblock(journal, write_op);
+        if (!(journal->j_flags & JBD2_BARRIER))
+                write_op &= ~(REQ_FUA | REQ_FLUSH);
+        lock_buffer(bh);
        if (buffer_write_io_error(bh)) {
                /*
                 * Oh, dear.  A previous attempt to write the journal
@@ -1152,40 +1243,45 @@ static void jbd2_write_superblock(journal_t *journal)
                clear_buffer_write_io_error(bh);
                set_buffer_uptodate(bh);
        }
+        get_bh(bh);
-        BUFFER_TRACE(bh, "marking dirty");
+        bh->b_end_io = end_buffer_write_sync;
-        mark_buffer_dirty(bh);
+        ret = submit_bh(write_op, bh);
-        sync_dirty_buffer(bh);
+        wait_on_buffer(bh);
        if (buffer_write_io_error(bh)) {
-                printk(KERN_ERR "JBD2: I/O error detected "
-                       "when updating journal superblock for %s.\n",
-                       journal->j_devname);
                clear_buffer_write_io_error(bh);
                set_buffer_uptodate(bh);
+                ret = -EIO;
+        }
+        if (ret) {
+                printk(KERN_ERR "JBD2: Error %d detected when updating "
+                       "journal superblock for %s.\n", ret,
+                       journal->j_devname);
        }
 }
 /**
 * jbd2_journal_update_sb_log_tail() - Update log tail in journal sb on disk.
 * @journal: The journal to update.
+ * @tail_tid: TID of the new transaction at the tail of the log
+ * @tail_block: The first block of the transaction at the tail of the log
+ * @write_op: With which operation should we write the journal sb
 *
 * Update a journal's superblock information about log tail and write it to
 * disk, waiting for the IO to complete.
 */
-void jbd2_journal_update_sb_log_tail(journal_t *journal)
+void jbd2_journal_update_sb_log_tail(journal_t *journal, tid_t tail_tid,
+                                     unsigned long tail_block, int write_op)
 {
        journal_superblock_t *sb = journal->j_superblock;
        BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex));
-        read_lock(&journal->j_state_lock);
+        jbd_debug(1, "JBD2: updating superblock (start %lu, seq %u)\n",
-        jbd_debug(1, "JBD2: updating superblock (start %ld, seq %d)\n",
+                  tail_block, tail_tid);
-                  journal->j_tail, journal->j_tail_sequence);
-        sb->s_sequence = cpu_to_be32(journal->j_tail_sequence);
+        sb->s_sequence = cpu_to_be32(tail_tid);
-        sb->s_start    = cpu_to_be32(journal->j_tail);
+        sb->s_start    = cpu_to_be32(tail_block);
-        read_unlock(&journal->j_state_lock);
-        jbd2_write_superblock(journal);
+        jbd2_write_superblock(journal, write_op);
        /* Log is no longer empty */
        write_lock(&journal->j_state_lock);
@@ -1214,7 +1310,7 @@ static void jbd2_mark_journal_empty(journal_t *journal)
        sb->s_start    = cpu_to_be32(0);
        read_unlock(&journal->j_state_lock);
-        jbd2_write_superblock(journal);
+        jbd2_write_superblock(journal, WRITE_FUA);
        /* Log is no longer empty */
        write_lock(&journal->j_state_lock);
@@ -1240,7 +1336,7 @@ static void jbd2_journal_update_sb_errno(journal_t *journal)
        sb->s_errno    = cpu_to_be32(journal->j_errno);
        read_unlock(&journal->j_state_lock);
-        jbd2_write_superblock(journal);
+        jbd2_write_superblock(journal, WRITE_SYNC);
 }
 /*
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index da6d7baf1390..c1a03354a22f 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -21,6 +21,7 @@
 #include <linux/jbd2.h>
 #include <linux/errno.h>
 #include <linux/crc32.h>
+#include <linux/blkdev.h>
 #endif
 /*
@@ -265,7 +266,9 @@ int jbd2_journal_recover(journal_t *journal)
        err2 = sync_blockdev(journal->j_fs_dev);
        if (!err)
                err = err2;
+        /* Make sure all replayed data is on permanent storage */
+        if (journal->j_flags & JBD2_BARRIER)
+                blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL);
        return err;
 }
author	Jan Kara <jack@suse.cz>	2012-03-13 22:22:54 -0400
committer	Theodore Ts'o <tytso@mit.edu>	2012-03-13 22:22:54 -0400
commit	79feb521a44705262d15cc819a4117a447b11ea7 (patch)
tree	a4de6ed084b7a68c0885049d94841ce8334b64a7 /fs
parent	a78bb11d7acd525623c6a0c2ff4e213d527573fa (diff)

diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index 19dcd0b86bca..7f7ee5b90402 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c
@@ -478,79 +478,28 @@ out:
478		478
479	int jbd2_cleanup_journal_tail(journal_t *journal)	479	int jbd2_cleanup_journal_tail(journal_t *journal)
480	{	480	{
481	transaction_t * transaction;
482	tid_t first_tid;	481	tid_t first_tid;
483	unsigned long blocknr, freed;	482	unsigned long blocknr;
484		483
485	if (is_journal_aborted(journal))	484	if (is_journal_aborted(journal))
486	return 1;	485	return 1;
487		486
488	/* OK, work out the oldest transaction remaining in the log, and	487	if (!jbd2_journal_get_log_tail(journal, &first_tid, &blocknr))
489	* the log block it starts at.
490	*
491	* If the log is now empty, we need to work out which is the
492	* next transaction ID we will write, and where it will
493	* start. */
494
495	write_lock(&journal->j_state_lock);
496	spin_lock(&journal->j_list_lock);
497	transaction = journal->j_checkpoint_transactions;
498	if (transaction) {
499	first_tid = transaction->t_tid;
500	blocknr = transaction->t_log_start;
501	} else if ((transaction = journal->j_committing_transaction) != NULL) {
502	first_tid = transaction->t_tid;
503	blocknr = transaction->t_log_start;
504	} else if ((transaction = journal->j_running_transaction) != NULL) {
505	first_tid = transaction->t_tid;
506	blocknr = journal->j_head;
507	} else {
508	first_tid = journal->j_transaction_sequence;
509	blocknr = journal->j_head;
510	}
511	spin_unlock(&journal->j_list_lock);
512	J_ASSERT(blocknr != 0);
513
514	/* If the oldest pinned transaction is at the tail of the log
515	already then there's not much we can do right now. */
516	if (journal->j_tail_sequence == first_tid) {
517	write_unlock(&journal->j_state_lock);
518	return 1;	488	return 1;
519	}	489	J_ASSERT(blocknr != 0);
520
521	/* OK, update the superblock to recover the freed space.
522	* Physical blocks come first: have we wrapped beyond the end of
523	* the log? */
524	freed = blocknr - journal->j_tail;
525	if (blocknr < journal->j_tail)
526	freed = freed + journal->j_last - journal->j_first;
527
528	trace_jbd2_cleanup_journal_tail(journal, first_tid, blocknr, freed);
529	jbd_debug(1,
530	"Cleaning journal tail from %d to %d (offset %lu), "
531	"freeing %lu\n",
532	journal->j_tail_sequence, first_tid, blocknr, freed);
533
534	journal->j_free += freed;
535	journal->j_tail_sequence = first_tid;
536	journal->j_tail = blocknr;
537	write_unlock(&journal->j_state_lock);
538		490
539	/*	491	/*
540	* If there is an external journal, we need to make sure that	492	* We need to make sure that any blocks that were recently written out
541	* any data blocks that were recently written out --- perhaps	493	* --- perhaps by jbd2_log_do_checkpoint() --- are flushed out before
542	* by jbd2_log_do_checkpoint() --- are flushed out before we	494	* we drop the transactions from the journal. It's unlikely this will
543	* drop the transactions from the external journal. It's	495	* be necessary, especially with an appropriately sized journal, but we
544	* unlikely this will be necessary, especially with a	496	* need this to guarantee correctness. Fortunately
545	* appropriately sized journal, but we need this to guarantee	497	* jbd2_cleanup_journal_tail() doesn't get called all that often.
546	* correctness. Fortunately jbd2_cleanup_journal_tail()
547	* doesn't get called all that often.
548	*/	498	*/
549	if ((journal->j_fs_dev != journal->j_dev) &&	499	if (journal->j_flags & JBD2_BARRIER)
550	(journal->j_flags & JBD2_BARRIER))
551	blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL);	500	blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL);
552	if (!(journal->j_flags & JBD2_ABORT))	501
553	jbd2_journal_update_sb_log_tail(journal);	502	__jbd2_update_log_tail(journal, first_tid, blocknr);
554	return 0;	503	return 0;
555	}	504	}
556		505


diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 6705717d9b7f..b89ef84786a7 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c
@@ -341,7 +341,16 @@ void jbd2_journal_commit_transaction(journal_t *journal)
341	if (journal->j_flags & JBD2_FLUSHED) {	341	if (journal->j_flags & JBD2_FLUSHED) {
342	jbd_debug(3, "super block updated\n");	342	jbd_debug(3, "super block updated\n");
343	mutex_lock(&journal->j_checkpoint_mutex);	343	mutex_lock(&journal->j_checkpoint_mutex);
344	jbd2_journal_update_sb_log_tail(journal);	344	/*
		345	* We hold j_checkpoint_mutex so tail cannot change under us.
		346	* We don't need any special data guarantees for writing sb
		347	* since journal is empty and it is ok for write to be
		348	* flushed only with transaction commit.
		349	*/
		350	jbd2_journal_update_sb_log_tail(journal,
		351	journal->j_tail_sequence,
		352	journal->j_tail,
		353	WRITE_SYNC);
345	mutex_unlock(&journal->j_checkpoint_mutex);	354	mutex_unlock(&journal->j_checkpoint_mutex);
346	} else {	355	} else {
347	jbd_debug(3, "superblock not updated\n");	356	jbd_debug(3, "superblock not updated\n");


diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index fc5f2acc9f18..c5ff177400ff 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c
@@ -742,6 +742,85 @@ struct journal_head jbd2_journal_get_descriptor_buffer(journal_t journal)
742	return jbd2_journal_add_journal_head(bh);	742	return jbd2_journal_add_journal_head(bh);
743	}	743	}
744		744
		745	/*
		746	* Return tid of the oldest transaction in the journal and block in the journal
		747	* where the transaction starts.
		748	*
		749	* If the journal is now empty, return which will be the next transaction ID
		750	* we will write and where will that transaction start.
		751	*
		752	* The return value is 0 if journal tail cannot be pushed any further, 1 if
		753	* it can.
		754	*/
		755	int jbd2_journal_get_log_tail(journal_t journal, tid_t tid,
		756	unsigned long *block)
		757	{
		758	transaction_t *transaction;
		759	int ret;
		760
		761	read_lock(&journal->j_state_lock);
		762	spin_lock(&journal->j_list_lock);
		763	transaction = journal->j_checkpoint_transactions;
		764	if (transaction) {
		765	*tid = transaction->t_tid;
		766	*block = transaction->t_log_start;
		767	} else if ((transaction = journal->j_committing_transaction) != NULL) {
		768	*tid = transaction->t_tid;
		769	*block = transaction->t_log_start;
		770	} else if ((transaction = journal->j_running_transaction) != NULL) {
		771	*tid = transaction->t_tid;
		772	*block = journal->j_head;
		773	} else {
		774	*tid = journal->j_transaction_sequence;
		775	*block = journal->j_head;
		776	}
		777	ret = tid_gt(*tid, journal->j_tail_sequence);
		778	spin_unlock(&journal->j_list_lock);
		779	read_unlock(&journal->j_state_lock);
		780
		781	return ret;
		782	}
		783
		784	/*
		785	* Update information in journal structure and in on disk journal superblock
		786	* about log tail. This function does not check whether information passed in
		787	* really pushes log tail further. It's responsibility of the caller to make
		788	* sure provided log tail information is valid (e.g. by holding
		789	* j_checkpoint_mutex all the time between computing log tail and calling this
		790	* function as is the case with jbd2_cleanup_journal_tail()).
		791	*
		792	* Requires j_checkpoint_mutex
		793	*/
		794	void __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block)
		795	{
		796	unsigned long freed;
		797
		798	BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex));
		799
		800	/*
		801	* We cannot afford for write to remain in drive's caches since as
		802	* soon as we update j_tail, next transaction can start reusing journal
		803	* space and if we lose sb update during power failure we'd replay
		804	* old transaction with possibly newly overwritten data.
		805	*/
		806	jbd2_journal_update_sb_log_tail(journal, tid, block, WRITE_FUA);
		807	write_lock(&journal->j_state_lock);
		808	freed = block - journal->j_tail;
		809	if (block < journal->j_tail)
		810	freed += journal->j_last - journal->j_first;
		811
		812	trace_jbd2_update_log_tail(journal, tid, block, freed);
		813	jbd_debug(1,
		814	"Cleaning journal tail from %d to %d (offset %lu), "
		815	"freeing %lu\n",
		816	journal->j_tail_sequence, tid, block, freed);
		817
		818	journal->j_free += freed;
		819	journal->j_tail_sequence = tid;
		820	journal->j_tail = block;
		821	write_unlock(&journal->j_state_lock);
		822	}
		823
745	struct jbd2_stats_proc_session {	824	struct jbd2_stats_proc_session {
746	journal_t *journal;	825	journal_t *journal;
747	struct transaction_stats_s *stats;	826	struct transaction_stats_s *stats;
@@ -1125,18 +1204,30 @@ static int journal_reset(journal_t *journal)
1125	} else {	1204	} else {
1126	/* Lock here to make assertions happy... */	1205	/* Lock here to make assertions happy... */
1127	mutex_lock(&journal->j_checkpoint_mutex);	1206	mutex_lock(&journal->j_checkpoint_mutex);
1128	/* Add the dynamic fields and write it to disk. */	1207	/*
1129	jbd2_journal_update_sb_log_tail(journal);	1208	* Update log tail information. We use WRITE_FUA since new
		1209	* transaction will start reusing journal space and so we
		1210	* must make sure information about current log tail is on
		1211	* disk before that.
		1212	*/
		1213	jbd2_journal_update_sb_log_tail(journal,
		1214	journal->j_tail_sequence,
		1215	journal->j_tail,
		1216	WRITE_FUA);
1130	mutex_unlock(&journal->j_checkpoint_mutex);	1217	mutex_unlock(&journal->j_checkpoint_mutex);
1131	}	1218	}
1132	return jbd2_journal_start_thread(journal);	1219	return jbd2_journal_start_thread(journal);
1133	}	1220	}
1134		1221
1135	static void jbd2_write_superblock(journal_t *journal)	1222	static void jbd2_write_superblock(journal_t *journal, int write_op)
1136	{	1223	{
1137	struct buffer_head *bh = journal->j_sb_buffer;	1224	struct buffer_head *bh = journal->j_sb_buffer;
		1225	int ret;
1138		1226
1139	trace_jbd2_write_superblock(journal);	1227	trace_jbd2_write_superblock(journal, write_op);
		1228	if (!(journal->j_flags & JBD2_BARRIER))
		1229	write_op &= ~(REQ_FUA \| REQ_FLUSH);
		1230	lock_buffer(bh);
1140	if (buffer_write_io_error(bh)) {	1231	if (buffer_write_io_error(bh)) {
1141	/*	1232	/*
1142	* Oh, dear. A previous attempt to write the journal	1233	* Oh, dear. A previous attempt to write the journal
@@ -1152,40 +1243,45 @@ static void jbd2_write_superblock(journal_t *journal)
1152	clear_buffer_write_io_error(bh);	1243	clear_buffer_write_io_error(bh);
1153	set_buffer_uptodate(bh);	1244	set_buffer_uptodate(bh);
1154	}	1245	}
1155		1246	get_bh(bh);
1156	BUFFER_TRACE(bh, "marking dirty");	1247	bh->b_end_io = end_buffer_write_sync;
1157	mark_buffer_dirty(bh);	1248	ret = submit_bh(write_op, bh);
1158	sync_dirty_buffer(bh);	1249	wait_on_buffer(bh);
1159	if (buffer_write_io_error(bh)) {	1250	if (buffer_write_io_error(bh)) {
1160	printk(KERN_ERR "JBD2: I/O error detected "
1161	"when updating journal superblock for %s.\n",
1162	journal->j_devname);
1163	clear_buffer_write_io_error(bh);	1251	clear_buffer_write_io_error(bh);
1164	set_buffer_uptodate(bh);	1252	set_buffer_uptodate(bh);
		1253	ret = -EIO;
		1254	}
		1255	if (ret) {
		1256	printk(KERN_ERR "JBD2: Error %d detected when updating "
		1257	"journal superblock for %s.\n", ret,
		1258	journal->j_devname);
1165	}	1259	}
1166	}	1260	}
1167		1261
1168	/**	1262	/**
1169	* jbd2_journal_update_sb_log_tail() - Update log tail in journal sb on disk.	1263	* jbd2_journal_update_sb_log_tail() - Update log tail in journal sb on disk.
1170	* @journal: The journal to update.	1264	* @journal: The journal to update.
		1265	* @tail_tid: TID of the new transaction at the tail of the log
		1266	* @tail_block: The first block of the transaction at the tail of the log
		1267	* @write_op: With which operation should we write the journal sb
1171	*	1268	*
1172	* Update a journal's superblock information about log tail and write it to	1269	* Update a journal's superblock information about log tail and write it to
1173	* disk, waiting for the IO to complete.	1270	* disk, waiting for the IO to complete.
1174	*/	1271	*/
1175	void jbd2_journal_update_sb_log_tail(journal_t *journal)	1272	void jbd2_journal_update_sb_log_tail(journal_t *journal, tid_t tail_tid,
		1273	unsigned long tail_block, int write_op)
1176	{	1274	{
1177	journal_superblock_t *sb = journal->j_superblock;	1275	journal_superblock_t *sb = journal->j_superblock;
1178		1276
1179	BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex));	1277	BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex));
1180	read_lock(&journal->j_state_lock);	1278	jbd_debug(1, "JBD2: updating superblock (start %lu, seq %u)\n",
1181	jbd_debug(1, "JBD2: updating superblock (start %ld, seq %d)\n",	1279	tail_block, tail_tid);
1182	journal->j_tail, journal->j_tail_sequence);
1183		1280
1184	sb->s_sequence = cpu_to_be32(journal->j_tail_sequence);	1281	sb->s_sequence = cpu_to_be32(tail_tid);
1185	sb->s_start = cpu_to_be32(journal->j_tail);	1282	sb->s_start = cpu_to_be32(tail_block);
1186	read_unlock(&journal->j_state_lock);
1187		1283
1188	jbd2_write_superblock(journal);	1284	jbd2_write_superblock(journal, write_op);
1189		1285
1190	/* Log is no longer empty */	1286	/* Log is no longer empty */
1191	write_lock(&journal->j_state_lock);	1287	write_lock(&journal->j_state_lock);
@@ -1214,7 +1310,7 @@ static void jbd2_mark_journal_empty(journal_t *journal)
1214	sb->s_start = cpu_to_be32(0);	1310	sb->s_start = cpu_to_be32(0);
1215	read_unlock(&journal->j_state_lock);	1311	read_unlock(&journal->j_state_lock);
1216		1312
1217	jbd2_write_superblock(journal);	1313	jbd2_write_superblock(journal, WRITE_FUA);
1218		1314
1219	/* Log is no longer empty */	1315	/* Log is no longer empty */
1220	write_lock(&journal->j_state_lock);	1316	write_lock(&journal->j_state_lock);
@@ -1240,7 +1336,7 @@ static void jbd2_journal_update_sb_errno(journal_t *journal)
1240	sb->s_errno = cpu_to_be32(journal->j_errno);	1336	sb->s_errno = cpu_to_be32(journal->j_errno);
1241	read_unlock(&journal->j_state_lock);	1337	read_unlock(&journal->j_state_lock);
1242		1338
1243	jbd2_write_superblock(journal);	1339	jbd2_write_superblock(journal, WRITE_SYNC);
1244	}	1340	}
1245		1341
1246	/*	1342	/*


diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c index da6d7baf1390..c1a03354a22f 100644 --- a/fs/jbd2/recovery.c +++ b/fs/jbd2/recovery.c
@@ -21,6 +21,7 @@
21	#include <linux/jbd2.h>	21	#include <linux/jbd2.h>
22	#include <linux/errno.h>	22	#include <linux/errno.h>
23	#include <linux/crc32.h>	23	#include <linux/crc32.h>
		24	#include <linux/blkdev.h>
24	#endif	25	#endif
25		26
26	/*	27	/*
@@ -265,7 +266,9 @@ int jbd2_journal_recover(journal_t *journal)
265	err2 = sync_blockdev(journal->j_fs_dev);	266	err2 = sync_blockdev(journal->j_fs_dev);
266	if (!err)	267	if (!err)
267	err = err2;	268	err = err2;
268		269	/* Make sure all replayed data is on permanent storage */
		270	if (journal->j_flags & JBD2_BARRIER)
		271	blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL);
269	return err;	272	return err;
270	}	273	}
271		274