3 files changed, 71 insertions, 57 deletions
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 7b4088b2364d..26d991ddc1e6 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -25,6 +25,7 @@
 #include <linux/writeback.h>
 #include <linux/backing-dev.h>
 #include <linux/bio.h>
+#include <linux/blkdev.h>
 #include <trace/events/jbd2.h>
 /*
@@ -133,8 +134,8 @@ static int journal_submit_commit_record(journal_t *journal,
        bh->b_end_io = journal_end_buffer_io_sync;
        if (journal->j_flags & JBD2_BARRIER &&
-                !JBD2_HAS_INCOMPAT_FEATURE(journal,
+            !JBD2_HAS_INCOMPAT_FEATURE(journal,
-                                         JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) {
+                                       JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) {
                set_buffer_ordered(bh);
                barrier_done = 1;
        }
@@ -220,7 +221,6 @@ static int journal_submit_inode_data_buffers(struct address_space *mapping)
                .nr_to_write = mapping->nrpages * 2,
                .range_start = 0,
                .range_end = i_size_read(mapping->host),
-                .for_writepages = 1,
        };
        ret = generic_writepages(mapping, &wbc);
@@ -707,11 +707,13 @@ start_journal_io:
        /* Done it all: now write the commit record asynchronously. */
        if (JBD2_HAS_INCOMPAT_FEATURE(journal,
-                JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) {
+                                      JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) {
                err = journal_submit_commit_record(journal, commit_transaction,
                                                 &cbh, crc32_sum);
                if (err)
                        __jbd2_journal_abort_hard(journal);
+                if (journal->j_flags & JBD2_BARRIER)
+                        blkdev_issue_flush(journal->j_dev, NULL);
        }
        /*
@@ -834,7 +836,7 @@ wait_for_iobuf:
        jbd_debug(3, "JBD: commit phase 5\n");
        if (!JBD2_HAS_INCOMPAT_FEATURE(journal,
-                JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) {
+                                       JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) {
                err = journal_submit_commit_record(journal, commit_transaction,
                                                &cbh, crc32_sum);
                if (err)
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 18bfd5dab642..53b86e16e5fe 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -297,6 +297,7 @@ int jbd2_journal_write_metadata_buffer(transaction_t *transaction,
        unsigned int new_offset;
        struct buffer_head *bh_in = jh2bh(jh_in);
        struct jbd2_buffer_trigger_type *triggers;
+        journal_t *journal = transaction->t_journal;
        /*
         * The buffer really shouldn't be locked: only the current committing
@@ -310,6 +311,11 @@ int jbd2_journal_write_metadata_buffer(transaction_t *transaction,
        J_ASSERT_BH(bh_in, buffer_jbddirty(bh_in));
        new_bh = alloc_buffer_head(GFP_NOFS|__GFP_NOFAIL);
+        /* keep subsequent assertions sane */
+        new_bh->b_state = 0;
+        init_buffer(new_bh, NULL, NULL);
+        atomic_set(&new_bh->b_count, 1);
+        new_jh = jbd2_journal_add_journal_head(new_bh); /* This sleeps */
        /*
         * If a new transaction has already done a buffer copy-out, then
@@ -388,14 +394,6 @@ repeat:
                kunmap_atomic(mapped_data, KM_USER0);
        }
-        /* keep subsequent assertions sane */
-        new_bh->b_state = 0;
-        init_buffer(new_bh, NULL, NULL);
-        atomic_set(&new_bh->b_count, 1);
-        jbd_unlock_bh_state(bh_in);
-        new_jh = jbd2_journal_add_journal_head(new_bh); /* This sleeps */
        set_bh_page(new_bh, new_page, new_offset);
        new_jh->b_transaction = NULL;
        new_bh->b_size = jh2bh(jh_in)->b_size;
@@ -412,7 +410,11 @@ repeat:
         * copying is moved to the transaction's shadow queue.
         */
        JBUFFER_TRACE(jh_in, "file as BJ_Shadow");
-        jbd2_journal_file_buffer(jh_in, transaction, BJ_Shadow);
+        spin_lock(&journal->j_list_lock);
+        __jbd2_journal_file_buffer(jh_in, transaction, BJ_Shadow);
+        spin_unlock(&journal->j_list_lock);
+        jbd_unlock_bh_state(bh_in);
        JBUFFER_TRACE(new_jh, "file as BJ_IO");
        jbd2_journal_file_buffer(new_jh, transaction, BJ_IO);
@@ -766,7 +768,7 @@ static void jbd2_seq_history_stop(struct seq_file *seq, void *v)
 {
 }
-static struct seq_operations jbd2_seq_history_ops = {
+static const struct seq_operations jbd2_seq_history_ops = {
        .start  = jbd2_seq_history_start,
        .next   = jbd2_seq_history_next,
        .stop   = jbd2_seq_history_stop,
@@ -870,7 +872,7 @@ static void jbd2_seq_info_stop(struct seq_file *seq, void *v)
 {
 }
-static struct seq_operations jbd2_seq_info_ops = {
+static const struct seq_operations jbd2_seq_info_ops = {
        .start  = jbd2_seq_info_start,
        .next   = jbd2_seq_info_next,
        .stop   = jbd2_seq_info_stop,
@@ -1185,6 +1187,12 @@ static int journal_reset(journal_t *journal)
        first = be32_to_cpu(sb->s_first);
        last = be32_to_cpu(sb->s_maxlen);
+        if (first + JBD2_MIN_JOURNAL_BLOCKS > last + 1) {
+                printk(KERN_ERR "JBD: Journal too short (blocks %llu-%llu).\n",
+                       first, last);
+                journal_fail_superblock(journal);
+                return -EINVAL;
+        }
        journal->j_first = first;
        journal->j_last = last;
@@ -2410,6 +2418,7 @@ const char *jbd2_dev_to_name(dev_t device)
        int     i = hash_32(device, CACHE_SIZE_BITS);
        char    *ret;
        struct block_device *bd;
+        static struct devname_cache *new_dev;
        rcu_read_lock();
        if (devcache[i] && devcache[i]->device == device) {
@@ -2419,20 +2428,20 @@ const char *jbd2_dev_to_name(dev_t device)
        }
        rcu_read_unlock();
+        new_dev = kmalloc(sizeof(struct devname_cache), GFP_KERNEL);
+        if (!new_dev)
+                return "NODEV-ALLOCFAILURE"; /* Something non-NULL */
        spin_lock(&devname_cache_lock);
        if (devcache[i]) {
                if (devcache[i]->device == device) {
+                        kfree(new_dev);
                        ret = devcache[i]->devname;
                        spin_unlock(&devname_cache_lock);
                        return ret;
                }
                call_rcu(&devcache[i]->rcu, free_devcache);
        }
-        devcache[i] = kmalloc(sizeof(struct devname_cache), GFP_KERNEL);
+        devcache[i] = new_dev;
-        if (!devcache[i]) {
-                spin_unlock(&devname_cache_lock);
-                return "NODEV-ALLOCFAILURE"; /* Something non-NULL */
-        }
        devcache[i]->device = device;
        bd = bdget(device);
        if (bd) {
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 494501edba6b..a0512700542f 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -57,7 +57,7 @@ jbd2_get_transaction(journal_t *journal, transaction_t *transaction)
        INIT_LIST_HEAD(&transaction->t_private_list);
        /* Set up the commit timer for the new transaction. */
-        journal->j_commit_timer.expires = round_jiffies(transaction->t_expires);
+        journal->j_commit_timer.expires = round_jiffies_up(transaction->t_expires);
        add_timer(&journal->j_commit_timer);
        J_ASSERT(journal->j_running_transaction == NULL);
@@ -238,6 +238,8 @@ repeat_locked:
                  __jbd2_log_space_left(journal));
        spin_unlock(&transaction->t_handle_lock);
        spin_unlock(&journal->j_state_lock);
+        lock_map_acquire(&handle->h_lockdep_map);
 out:
        if (unlikely(new_transaction))          /* It's usually NULL */
                kfree(new_transaction);
@@ -303,8 +305,6 @@ handle_t *jbd2_journal_start(journal_t *journal, int nblocks)
                handle = ERR_PTR(err);
                goto out;
        }
-        lock_map_acquire(&handle->h_lockdep_map);
 out:
        return handle;
 }
@@ -426,6 +426,7 @@ int jbd2_journal_restart(handle_t *handle, int nblocks)
        __jbd2_log_start_commit(journal, transaction->t_tid);
        spin_unlock(&journal->j_state_lock);
+        lock_map_release(&handle->h_lockdep_map);
        handle->h_buffer_credits = nblocks;
        ret = start_this_handle(journal, handle);
        return ret;
@@ -499,34 +500,15 @@ void jbd2_journal_unlock_updates (journal_t *journal)
        wake_up(&journal->j_wait_transaction_locked);
 }
-/*
+static void warn_dirty_buffer(struct buffer_head *bh)
- * Report any unexpected dirty buffers which turn up.  Normally those
- * indicate an error, but they can occur if the user is running (say)
- * tune2fs to modify the live filesystem, so we need the option of
- * continuing as gracefully as possible.  #
- *
- * The caller should already hold the journal lock and
- * j_list_lock spinlock: most callers will need those anyway
- * in order to probe the buffer's journaling state safely.
- */
-static void jbd_unexpected_dirty_buffer(struct journal_head *jh)
 {
-        int jlist;
+        char b[BDEVNAME_SIZE];
-        /* If this buffer is one which might reasonably be dirty
-         * --- ie. data, or not part of this journal --- then
-         * we're OK to leave it alone, but otherwise we need to
-         * move the dirty bit to the journal's own internal
-         * JBDDirty bit. */
-        jlist = jh->b_jlist;
-        if (jlist == BJ_Metadata || jlist == BJ_Reserved ||
-            jlist == BJ_Shadow || jlist == BJ_Forget) {
-                struct buffer_head *bh = jh2bh(jh);
-                if (test_clear_buffer_dirty(bh))
+        printk(KERN_WARNING
-                        set_buffer_jbddirty(bh);
+               "JBD: Spotted dirty metadata buffer (dev = %s, blocknr = %llu). "
-        }
+               "There's a risk of filesystem corruption in case of system "
+               "crash.\n",
+               bdevname(bh->b_bdev, b), (unsigned long long)bh->b_blocknr);
 }
 /*
@@ -593,14 +575,16 @@ repeat:
                        if (jh->b_next_transaction)
                                J_ASSERT_JH(jh, jh->b_next_transaction ==
                                                        transaction);
+                        warn_dirty_buffer(bh);
                }
                /*
                 * In any case we need to clean the dirty flag and we must
                 * do it under the buffer lock to be sure we don't race
                 * with running write-out.
                 */
-                JBUFFER_TRACE(jh, "Unexpected dirty buffer");
+                JBUFFER_TRACE(jh, "Journalling dirty buffer");
-                jbd_unexpected_dirty_buffer(jh);
+                clear_buffer_dirty(bh);
+                set_buffer_jbddirty(bh);
        }
        unlock_buffer(bh);
@@ -843,6 +827,15 @@ int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh)
        J_ASSERT_JH(jh, buffer_locked(jh2bh(jh)));
        if (jh->b_transaction == NULL) {
+                /*
+                 * Previous jbd2_journal_forget() could have left the buffer
+                 * with jbddirty bit set because it was being committed. When
+                 * the commit finished, we've filed the buffer for
+                 * checkpointing and marked it dirty. Now we are reallocating
+                 * the buffer so the transaction freeing it must have
+                 * committed and so it's safe to clear the dirty bit.
+                 */
+                clear_buffer_dirty(jh2bh(jh));
                jh->b_transaction = transaction;
                /* first access by this transaction */
@@ -1644,8 +1637,13 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction)
        if (jh->b_cp_transaction) {
                JBUFFER_TRACE(jh, "on running+cp transaction");
+                /*
+                 * We don't want to write the buffer anymore, clear the
+                 * bit so that we don't confuse checks in
+                 * __journal_file_buffer
+                 */
+                clear_buffer_dirty(bh);
                __jbd2_journal_file_buffer(jh, transaction, BJ_Forget);
-                clear_buffer_jbddirty(bh);
                may_free = 0;
        } else {
                JBUFFER_TRACE(jh, "on running transaction");
@@ -1896,12 +1894,17 @@ void __jbd2_journal_file_buffer(struct journal_head *jh,
        if (jh->b_transaction && jh->b_jlist == jlist)
                return;
-        /* The following list of buffer states needs to be consistent
-         * with __jbd_unexpected_dirty_buffer()'s handling of dirty
-         * state. */
        if (jlist == BJ_Metadata || jlist == BJ_Reserved ||
            jlist == BJ_Shadow || jlist == BJ_Forget) {
+                /*
+                 * For metadata buffers, we track dirty bit in buffer_jbddirty
+                 * instead of buffer_dirty. We should not see a dirty bit set
+                 * here because we clear it in do_get_write_access but e.g.
+                 * tune2fs can modify the sb and set the dirty bit at any time
+                 * so we try to gracefully handle that.
+                 */
+                if (buffer_dirty(bh))
+                        warn_dirty_buffer(bh);
                if (test_clear_buffer_dirty(bh) ||
                    test_clear_buffer_jbddirty(bh))
                        was_dirty = 1;