3 files changed, 64 insertions, 20 deletions
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index 42895d369458..9203c3332f17 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -94,7 +94,8 @@ static int __try_to_free_cp_buf(struct journal_head *jh)
        int ret = 0;
        struct buffer_head *bh = jh2bh(jh);
-        if (jh->b_jlist == BJ_None && !buffer_locked(bh) && !buffer_dirty(bh)) {
+        if (jh->b_jlist == BJ_None && !buffer_locked(bh) &&
+            !buffer_dirty(bh) && !buffer_write_io_error(bh)) {
                JBUFFER_TRACE(jh, "remove from checkpoint list");
                ret = __jbd2_journal_remove_checkpoint(jh) + 1;
                jbd_unlock_bh_state(bh);
@@ -176,21 +177,25 @@ static void jbd_sync_bh(journal_t *journal, struct buffer_head *bh)
 * buffers. Note that we take the buffers in the opposite ordering
 * from the one in which they were submitted for IO.
 *
+ * Return 0 on success, and return <0 if some buffers have failed
+ * to be written out.
+ *
 * Called with j_list_lock held.
 */
-static void __wait_cp_io(journal_t *journal, transaction_t *transaction)
+static int __wait_cp_io(journal_t *journal, transaction_t *transaction)
 {
        struct journal_head *jh;
        struct buffer_head *bh;
        tid_t this_tid;
        int released = 0;
+        int ret = 0;
        this_tid = transaction->t_tid;
 restart:
        /* Did somebody clean up the transaction in the meanwhile? */
        if (journal->j_checkpoint_transactions != transaction ||
                        transaction->t_tid != this_tid)
-                return;
+                return ret;
        while (!released && transaction->t_checkpoint_io_list) {
                jh = transaction->t_checkpoint_io_list;
                bh = jh2bh(jh);
@@ -210,6 +215,9 @@ restart:
                        spin_lock(&journal->j_list_lock);
                        goto restart;
                }
+                if (unlikely(buffer_write_io_error(bh)))
+                        ret = -EIO;
                /*
                 * Now in whatever state the buffer currently is, we know that
                 * it has been written out and so we can drop it from the list
@@ -219,6 +227,8 @@ restart:
                jbd2_journal_remove_journal_head(bh);
                __brelse(bh);
        }
+        return ret;
 }
 #define NR_BATCH        64
@@ -242,7 +252,8 @@ __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count)
 * Try to flush one buffer from the checkpoint list to disk.
 *
 * Return 1 if something happened which requires us to abort the current
- * scan of the checkpoint list.
+ * scan of the checkpoint list.  Return <0 if the buffer has failed to
+ * be written out.
 *
 * Called with j_list_lock held and drops it if 1 is returned
 * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it
@@ -274,6 +285,9 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
                jbd2_log_wait_commit(journal, tid);
                ret = 1;
        } else if (!buffer_dirty(bh)) {
+                ret = 1;
+                if (unlikely(buffer_write_io_error(bh)))
+                        ret = -EIO;
                J_ASSERT_JH(jh, !buffer_jbddirty(bh));
                BUFFER_TRACE(bh, "remove from checkpoint");
                __jbd2_journal_remove_checkpoint(jh);
@@ -281,7 +295,6 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
                jbd_unlock_bh_state(bh);
                jbd2_journal_remove_journal_head(bh);
                __brelse(bh);
-                ret = 1;
        } else {
                /*
                 * Important: we are about to write the buffer, and
@@ -314,6 +327,7 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
 * to disk. We submit larger chunks of data at once.
 *
 * The journal should be locked before calling this function.
+ * Called with j_checkpoint_mutex held.
 */
 int jbd2_log_do_checkpoint(journal_t *journal)
 {
@@ -339,6 +353,7 @@ int jbd2_log_do_checkpoint(journal_t *journal)
         * OK, we need to start writing disk blocks.  Take one transaction
         * and write it.
         */
+        result = 0;
        spin_lock(&journal->j_list_lock);
        if (!journal->j_checkpoint_transactions)
                goto out;
@@ -357,7 +372,7 @@ restart:
                int batch_count = 0;
                struct buffer_head *bhs[NR_BATCH];
                struct journal_head *jh;
-                int retry = 0;
+                int retry = 0, err;
                while (!retry && transaction->t_checkpoint_list) {
                        struct buffer_head *bh;
@@ -371,6 +386,8 @@ restart:
                        }
                        retry = __process_buffer(journal, jh, bhs, &batch_count,
                                                 transaction);
+                        if (retry < 0 && !result)
+                                result = retry;
                        if (!retry && (need_resched() ||
                                spin_needbreak(&journal->j_list_lock))) {
                                spin_unlock(&journal->j_list_lock);
@@ -395,14 +412,18 @@ restart:
                 * Now we have cleaned up the first transaction's checkpoint
                 * list. Let's clean up the second one
                 */
-                __wait_cp_io(journal, transaction);
+                err = __wait_cp_io(journal, transaction);
+                if (!result)
+                        result = err;
        }
 out:
        spin_unlock(&journal->j_list_lock);
-        result = jbd2_cleanup_journal_tail(journal);
        if (result < 0)
-                return result;
+                jbd2_journal_abort(journal, result);
-        return 0;
+        else
+                result = jbd2_cleanup_journal_tail(journal);
+        return (result < 0) ? result : 0;
 }
 /*
@@ -418,8 +439,9 @@ out:
 * This is the only part of the journaling code which really needs to be
 * aware of transaction aborts.  Checkpointing involves writing to the
 * main filesystem area rather than to the journal, so it can proceed
- * even in abort state, but we must not update the journal superblock if
+ * even in abort state, but we must not update the super block if
- * we have an abort error outstanding.
+ * checkpointing may have failed.  Otherwise, we would lose some metadata
+ * buffers which should be written-back to the filesystem.
 */
 int jbd2_cleanup_journal_tail(journal_t *journal)
@@ -428,6 +450,9 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
        tid_t           first_tid;
        unsigned long   blocknr, freed;
+        if (is_journal_aborted(journal))
+                return 1;
        /* OK, work out the oldest transaction remaining in the log, and
         * the log block it starts at.
         *
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 01c3901c3a07..783de118de92 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -1451,9 +1451,12 @@ recovery_error:
 *
 * Release a journal_t structure once it is no longer in use by the
 * journaled object.
+ * Return <0 if we couldn't clean up the journal.
 */
-void jbd2_journal_destroy(journal_t *journal)
+int jbd2_journal_destroy(journal_t *journal)
 {
+        int err = 0;
        /* Wait for the commit thread to wake up and die. */
        journal_kill_thread(journal);
@@ -1476,11 +1479,16 @@ void jbd2_journal_destroy(journal_t *journal)
        J_ASSERT(journal->j_checkpoint_transactions == NULL);
        spin_unlock(&journal->j_list_lock);
-        /* We can now mark the journal as empty. */
-        journal->j_tail = 0;
-        journal->j_tail_sequence = ++journal->j_transaction_sequence;
        if (journal->j_sb_buffer) {
-                jbd2_journal_update_superblock(journal, 1);
+                if (!is_journal_aborted(journal)) {
+                        /* We can now mark the journal as empty. */
+                        journal->j_tail = 0;
+                        journal->j_tail_sequence =
+                                ++journal->j_transaction_sequence;
+                        jbd2_journal_update_superblock(journal, 1);
+                } else {
+                        err = -EIO;
+                }
                brelse(journal->j_sb_buffer);
        }
@@ -1492,6 +1500,8 @@ void jbd2_journal_destroy(journal_t *journal)
                jbd2_journal_destroy_revoke(journal);
        kfree(journal->j_wbuf);
        kfree(journal);
+        return err;
 }
@@ -1717,10 +1727,16 @@ int jbd2_journal_flush(journal_t *journal)
        spin_lock(&journal->j_list_lock);
        while (!err && journal->j_checkpoint_transactions != NULL) {
                spin_unlock(&journal->j_list_lock);
+                mutex_lock(&journal->j_checkpoint_mutex);
                err = jbd2_log_do_checkpoint(journal);
+                mutex_unlock(&journal->j_checkpoint_mutex);
                spin_lock(&journal->j_list_lock);
        }
        spin_unlock(&journal->j_list_lock);
+        if (is_journal_aborted(journal))
+                return -EIO;
        jbd2_cleanup_journal_tail(journal);
        /* Finally, mark the journal as really needing no recovery.
@@ -1742,7 +1758,7 @@ int jbd2_journal_flush(journal_t *journal)
        J_ASSERT(journal->j_head == journal->j_tail);
        J_ASSERT(journal->j_tail_sequence == journal->j_transaction_sequence);
        spin_unlock(&journal->j_state_lock);
-        return err;
+        return 0;
 }
 /**
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index 058f50f65b76..73063285b13f 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -225,7 +225,7 @@ do {									\
 */
 int jbd2_journal_recover(journal_t *journal)
 {
-        int                     err;
+        int                     err, err2;
        journal_superblock_t *  sb;
        struct recovery_info    info;
@@ -263,7 +263,10 @@ int jbd2_journal_recover(journal_t *journal)
        journal->j_transaction_sequence = ++info.end_transaction;
        jbd2_journal_clear_revoke(journal);
-        sync_blockdev(journal->j_fs_dev);
+        err2 = sync_blockdev(journal->j_fs_dev);
+        if (!err)
+                err = err2;
        return err;
 }