Merge commit 'v2.6.27-rc3' into x86/prototypes

Conflicts: include/asm-x86/dma-mapping.h Signed-off-by: Ingo Molnar <mingo@elte.hu>
author: Ingo Molnar <mingo@elte.hu> 2008-08-14 06:19:59 -0400
committer: Ingo Molnar <mingo@elte.hu> 2008-08-14 06:19:59 -0400
commit: 8d7ccaa545490cdffdfaff0842436a8dd85cf47b (patch)
tree: 8129b5907161bc6ae26deb3645ce1e280c5e1f51 /fs/jbd
parent: b2139aa0eec330c711c5a279db361e5ef1178e78 (diff)
parent: 30a2f3c60a84092c8084dfe788b710f8d0768cd4 (diff)
4 files changed, 185 insertions, 115 deletions
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index 5a8ca61498ca..ae08c057e751 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -36,7 +36,7 @@ static void journal_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
 /*
 * When an ext3-ordered file is truncated, it is possible that many pages are
- * not sucessfully freed, because they are attached to a committing transaction.
+ * not successfully freed, because they are attached to a committing transaction.
 * After the transaction commits, these pages are left on the LRU, with no
 * ->mapping, and with attached buffers.  These pages are trivially reclaimable
 * by the VM, but their apparent absence upsets the VM accounting, and it makes
@@ -45,8 +45,8 @@ static void journal_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
 * So here, we have a buffer which has just come off the forget list.  Look to
 * see if we can strip all buffers from the backing page.
 *
- * Called under lock_journal(), and possibly under journal_datalist_lock.  The
+ * Called under journal->j_list_lock.  The caller provided us with a ref
- * caller provided us with a ref against the buffer, and we drop that here.
+ * against the buffer, and we drop that here.
 */
 static void release_buffer_page(struct buffer_head *bh)
 {
@@ -63,7 +63,7 @@ static void release_buffer_page(struct buffer_head *bh)
                goto nope;
        /* OK, it's a truncated page */
-        if (TestSetPageLocked(page))
+        if (!trylock_page(page))
                goto nope;
        page_cache_get(page);
@@ -78,6 +78,19 @@ nope:
 }
 /*
+ * Decrement reference counter for data buffer. If it has been marked
+ * 'BH_Freed', release it and the page to which it belongs if possible.
+ */
+static void release_data_buffer(struct buffer_head *bh)
+{
+        if (buffer_freed(bh)) {
+                clear_buffer_freed(bh);
+                release_buffer_page(bh);
+        } else
+                put_bh(bh);
+}
+/*
 * Try to acquire jbd_lock_bh_state() against the buffer, when j_list_lock is
 * held.  For ranking reasons we must trylock.  If we lose, schedule away and
 * return 0.  j_list_lock is dropped in this case.
@@ -172,7 +185,7 @@ static void journal_do_submit_data(struct buffer_head **wbuf, int bufs)
 /*
 *  Submit all the data buffers to disk
 */
-static void journal_submit_data_buffers(journal_t *journal,
+static int journal_submit_data_buffers(journal_t *journal,
                                transaction_t *commit_transaction)
 {
        struct journal_head *jh;
@@ -180,6 +193,7 @@ static void journal_submit_data_buffers(journal_t *journal,
        int locked;
        int bufs = 0;
        struct buffer_head **wbuf = journal->j_wbuf;
+        int err = 0;
        /*
         * Whenever we unlock the journal and sleep, things can get added
@@ -207,7 +221,7 @@ write_out_data:
                 * blocking lock_buffer().
                 */
                if (buffer_dirty(bh)) {
-                        if (test_set_buffer_locked(bh)) {
+                        if (!trylock_buffer(bh)) {
                                BUFFER_TRACE(bh, "needs blocking lock");
                                spin_unlock(&journal->j_list_lock);
                                /* Write out all data to prevent deadlocks */
@@ -231,7 +245,7 @@ write_out_data:
                        if (locked)
                                unlock_buffer(bh);
                        BUFFER_TRACE(bh, "already cleaned up");
-                        put_bh(bh);
+                        release_data_buffer(bh);
                        continue;
                }
                if (locked && test_clear_buffer_dirty(bh)) {
@@ -253,15 +267,17 @@ write_out_data:
                        put_bh(bh);
                } else {
                        BUFFER_TRACE(bh, "writeout complete: unfile");
+                        if (unlikely(!buffer_uptodate(bh)))
+                                err = -EIO;
                        __journal_unfile_buffer(jh);
                        jbd_unlock_bh_state(bh);
                        if (locked)
                                unlock_buffer(bh);
                        journal_remove_journal_head(bh);
-                        /* Once for our safety reference, once for
+                        /* One for our safety reference, other for
                         * journal_remove_journal_head() */
                        put_bh(bh);
-                        put_bh(bh);
+                        release_data_buffer(bh);
                }
                if (need_resched() || spin_needbreak(&journal->j_list_lock)) {
@@ -271,6 +287,8 @@ write_out_data:
        }
        spin_unlock(&journal->j_list_lock);
        journal_do_submit_data(wbuf, bufs);
+        return err;
 }
 /*
@@ -410,8 +428,7 @@ void journal_commit_transaction(journal_t *journal)
         * Now start flushing things to disk, in the order they appear
         * on the transaction lists.  Data blocks go first.
         */
-        err = 0;
+        err = journal_submit_data_buffers(journal, commit_transaction);
-        journal_submit_data_buffers(journal, commit_transaction);
        /*
         * Wait for all previously submitted IO to complete.
@@ -426,10 +443,21 @@ void journal_commit_transaction(journal_t *journal)
                if (buffer_locked(bh)) {
                        spin_unlock(&journal->j_list_lock);
                        wait_on_buffer(bh);
-                        if (unlikely(!buffer_uptodate(bh)))
-                                err = -EIO;
                        spin_lock(&journal->j_list_lock);
                }
+                if (unlikely(!buffer_uptodate(bh))) {
+                        if (!trylock_page(bh->b_page)) {
+                                spin_unlock(&journal->j_list_lock);
+                                lock_page(bh->b_page);
+                                spin_lock(&journal->j_list_lock);
+                        }
+                        if (bh->b_page->mapping)
+                                set_bit(AS_EIO, &bh->b_page->mapping->flags);
+                        unlock_page(bh->b_page);
+                        SetPageError(bh->b_page);
+                        err = -EIO;
+                }
                if (!inverted_lock(journal, bh)) {
                        put_bh(bh);
                        spin_lock(&journal->j_list_lock);
@@ -443,17 +471,21 @@ void journal_commit_transaction(journal_t *journal)
                } else {
                        jbd_unlock_bh_state(bh);
                }
-                put_bh(bh);
+                release_data_buffer(bh);
                cond_resched_lock(&journal->j_list_lock);
        }
        spin_unlock(&journal->j_list_lock);
-        if (err)
+        if (err) {
-                journal_abort(journal, err);
+                char b[BDEVNAME_SIZE];
-        journal_write_revoke_records(journal, commit_transaction);
+                printk(KERN_WARNING
+                        "JBD: Detected IO errors while flushing file data "
+                        "on %s\n", bdevname(journal->j_fs_dev, b));
+                err = 0;
+        }
-        jbd_debug(3, "JBD: commit phase 2\n");
+        journal_write_revoke_records(journal, commit_transaction);
        /*
         * If we found any dirty or locked buffers, then we should have
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index b99c3b3654c4..aa7143a8349b 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -68,7 +68,6 @@ EXPORT_SYMBOL(journal_set_features);
 EXPORT_SYMBOL(journal_create);
 EXPORT_SYMBOL(journal_load);
 EXPORT_SYMBOL(journal_destroy);
-EXPORT_SYMBOL(journal_update_superblock);
 EXPORT_SYMBOL(journal_abort);
 EXPORT_SYMBOL(journal_errno);
 EXPORT_SYMBOL(journal_ack_err);
@@ -1636,9 +1635,10 @@ static int journal_init_journal_head_cache(void)
 static void journal_destroy_journal_head_cache(void)
 {
-        J_ASSERT(journal_head_cache != NULL);
+        if (journal_head_cache) {
-        kmem_cache_destroy(journal_head_cache);
+                kmem_cache_destroy(journal_head_cache);
-        journal_head_cache = NULL;
+                journal_head_cache = NULL;
+        }
 }
 /*
diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c
index 1bb43e987f4b..c7bd649bbbdc 100644
--- a/fs/jbd/revoke.c
+++ b/fs/jbd/revoke.c
@@ -166,138 +166,123 @@ static struct jbd_revoke_record_s *find_revoke_record(journal_t *journal,
        return NULL;
 }
+void journal_destroy_revoke_caches(void)
+{
+        if (revoke_record_cache) {
+                kmem_cache_destroy(revoke_record_cache);
+                revoke_record_cache = NULL;
+        }
+        if (revoke_table_cache) {
+                kmem_cache_destroy(revoke_table_cache);
+                revoke_table_cache = NULL;
+        }
+}
 int __init journal_init_revoke_caches(void)
 {
+        J_ASSERT(!revoke_record_cache);
+        J_ASSERT(!revoke_table_cache);
        revoke_record_cache = kmem_cache_create("revoke_record",
                                           sizeof(struct jbd_revoke_record_s),
                                           0,
                                           SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY,
                                           NULL);
        if (!revoke_record_cache)
-                return -ENOMEM;
+                goto record_cache_failure;
        revoke_table_cache = kmem_cache_create("revoke_table",
                                           sizeof(struct jbd_revoke_table_s),
                                           0, SLAB_TEMPORARY, NULL);
-        if (!revoke_table_cache) {
+        if (!revoke_table_cache)
-                kmem_cache_destroy(revoke_record_cache);
+                goto table_cache_failure;
-                revoke_record_cache = NULL;
-                return -ENOMEM;
-        }
        return 0;
-}
-void journal_destroy_revoke_caches(void)
+table_cache_failure:
-{
+        journal_destroy_revoke_caches();
-        kmem_cache_destroy(revoke_record_cache);
+record_cache_failure:
-        revoke_record_cache = NULL;
+        return -ENOMEM;
-        kmem_cache_destroy(revoke_table_cache);
-        revoke_table_cache = NULL;
 }
-/* Initialise the revoke table for a given journal to a given size. */
+static struct jbd_revoke_table_s *journal_init_revoke_table(int hash_size)
-int journal_init_revoke(journal_t *journal, int hash_size)
 {
-        int shift, tmp;
+        int shift = 0;
+        int tmp = hash_size;
+        struct jbd_revoke_table_s *table;
-        J_ASSERT (journal->j_revoke_table[0] == NULL);
+        table = kmem_cache_alloc(revoke_table_cache, GFP_KERNEL);
+        if (!table)
+                goto out;
-        shift = 0;
-        tmp = hash_size;
        while((tmp >>= 1UL) != 0UL)
                shift++;
-        journal->j_revoke_table[0] = kmem_cache_alloc(revoke_table_cache, GFP_KERNEL);
+        table->hash_size = hash_size;
-        if (!journal->j_revoke_table[0])
+        table->hash_shift = shift;
-                return -ENOMEM;
+        table->hash_table =
-        journal->j_revoke = journal->j_revoke_table[0];
-        /* Check that the hash_size is a power of two */
-        J_ASSERT(is_power_of_2(hash_size));
-        journal->j_revoke->hash_size = hash_size;
-        journal->j_revoke->hash_shift = shift;
-        journal->j_revoke->hash_table =
                kmalloc(hash_size * sizeof(struct list_head), GFP_KERNEL);
-        if (!journal->j_revoke->hash_table) {
+        if (!table->hash_table) {
-                kmem_cache_free(revoke_table_cache, journal->j_revoke_table[0]);
+                kmem_cache_free(revoke_table_cache, table);
-                journal->j_revoke = NULL;
+                table = NULL;
-                return -ENOMEM;
+                goto out;
        }
        for (tmp = 0; tmp < hash_size; tmp++)
-                INIT_LIST_HEAD(&journal->j_revoke->hash_table[tmp]);
+                INIT_LIST_HEAD(&table->hash_table[tmp]);
-        journal->j_revoke_table[1] = kmem_cache_alloc(revoke_table_cache, GFP_KERNEL);
+out:
-        if (!journal->j_revoke_table[1]) {
+        return table;
-                kfree(journal->j_revoke_table[0]->hash_table);
+}
-                kmem_cache_free(revoke_table_cache, journal->j_revoke_table[0]);
-                return -ENOMEM;
+static void journal_destroy_revoke_table(struct jbd_revoke_table_s *table)
+{
+        int i;
+        struct list_head *hash_list;
+        for (i = 0; i < table->hash_size; i++) {
+                hash_list = &table->hash_table[i];
+                J_ASSERT(list_empty(hash_list));
        }
-        journal->j_revoke = journal->j_revoke_table[1];
+        kfree(table->hash_table);
+        kmem_cache_free(revoke_table_cache, table);
+}
-        /* Check that the hash_size is a power of two */
+/* Initialise the revoke table for a given journal to a given size. */
+int journal_init_revoke(journal_t *journal, int hash_size)
+{
+        J_ASSERT(journal->j_revoke_table[0] == NULL);
        J_ASSERT(is_power_of_2(hash_size));
-        journal->j_revoke->hash_size = hash_size;
+        journal->j_revoke_table[0] = journal_init_revoke_table(hash_size);
+        if (!journal->j_revoke_table[0])
+                goto fail0;
-        journal->j_revoke->hash_shift = shift;
+        journal->j_revoke_table[1] = journal_init_revoke_table(hash_size);
+        if (!journal->j_revoke_table[1])
+                goto fail1;
-        journal->j_revoke->hash_table =
+        journal->j_revoke = journal->j_revoke_table[1];
-                kmalloc(hash_size * sizeof(struct list_head), GFP_KERNEL);
-        if (!journal->j_revoke->hash_table) {
-                kfree(journal->j_revoke_table[0]->hash_table);
-                kmem_cache_free(revoke_table_cache, journal->j_revoke_table[0]);
-                kmem_cache_free(revoke_table_cache, journal->j_revoke_table[1]);
-                journal->j_revoke = NULL;
-                return -ENOMEM;
-        }
-        for (tmp = 0; tmp < hash_size; tmp++)
-                INIT_LIST_HEAD(&journal->j_revoke->hash_table[tmp]);
        spin_lock_init(&journal->j_revoke_lock);
        return 0;
-}
-/* Destoy a journal's revoke table.  The table must already be empty! */
+fail1:
+        journal_destroy_revoke_table(journal->j_revoke_table[0]);
+fail0:
+        return -ENOMEM;
+}
+/* Destroy a journal's revoke table.  The table must already be empty! */
 void journal_destroy_revoke(journal_t *journal)
 {
-        struct jbd_revoke_table_s *table;
-        struct list_head *hash_list;
-        int i;
-        table = journal->j_revoke_table[0];
-        if (!table)
-                return;
-        for (i=0; i<table->hash_size; i++) {
-                hash_list = &table->hash_table[i];
-                J_ASSERT (list_empty(hash_list));
-        }
-        kfree(table->hash_table);
-        kmem_cache_free(revoke_table_cache, table);
-        journal->j_revoke = NULL;
-        table = journal->j_revoke_table[1];
-        if (!table)
-                return;
-        for (i=0; i<table->hash_size; i++) {
-                hash_list = &table->hash_table[i];
-                J_ASSERT (list_empty(hash_list));
-        }
-        kfree(table->hash_table);
-        kmem_cache_free(revoke_table_cache, table);
        journal->j_revoke = NULL;
+        if (journal->j_revoke_table[0])
+                journal_destroy_revoke_table(journal->j_revoke_table[0]);
+        if (journal->j_revoke_table[1])
+                journal_destroy_revoke_table(journal->j_revoke_table[1]);
 }
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index 67ff2024c23c..0540ca27a446 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -291,7 +291,7 @@ handle_t *journal_start(journal_t *journal, int nblocks)
                goto out;
        }
-        lock_acquire(&handle->h_lockdep_map, 0, 0, 0, 2, _THIS_IP_);
+        lock_map_acquire(&handle->h_lockdep_map);
 out:
        return handle;
@@ -1448,7 +1448,7 @@ int journal_stop(handle_t *handle)
                spin_unlock(&journal->j_state_lock);
        }
-        lock_release(&handle->h_lockdep_map, 1, _THIS_IP_);
+        lock_map_release(&handle->h_lockdep_map);
        jbd_free_handle(handle);
        return err;
@@ -1648,12 +1648,42 @@ out:
        return;
 }
+/*
+ * journal_try_to_free_buffers() could race with journal_commit_transaction()
+ * The latter might still hold the a count on buffers when inspecting
+ * them on t_syncdata_list or t_locked_list.
+ *
+ * journal_try_to_free_buffers() will call this function to
+ * wait for the current transaction to finish syncing data buffers, before
+ * tryinf to free that buffer.
+ *
+ * Called with journal->j_state_lock held.
+ */
+static void journal_wait_for_transaction_sync_data(journal_t *journal)
+{
+        transaction_t *transaction = NULL;
+        tid_t tid;
+        spin_lock(&journal->j_state_lock);
+        transaction = journal->j_committing_transaction;
+        if (!transaction) {
+                spin_unlock(&journal->j_state_lock);
+                return;
+        }
+        tid = transaction->t_tid;
+        spin_unlock(&journal->j_state_lock);
+        log_wait_commit(journal, tid);
+}
 /**
 * int journal_try_to_free_buffers() - try to free page buffers.
 * @journal: journal for operation
 * @page: to try and free
- * @unused_gfp_mask: unused
+ * @gfp_mask: we use the mask to detect how hard should we try to release
+ * buffers. If __GFP_WAIT and __GFP_FS is set, we wait for commit code to
+ * release the buffers.
 *
 *
 * For all the buffers on this page,
@@ -1682,9 +1712,11 @@ out:
 * journal_try_to_free_buffer() is changing its state.  But that
 * cannot happen because we never reallocate freed data as metadata
 * while the data is part of a transaction.  Yes?
+ *
+ * Return 0 on failure, 1 on success
 */
 int journal_try_to_free_buffers(journal_t *journal,
-                                struct page *page, gfp_t unused_gfp_mask)
+                                struct page *page, gfp_t gfp_mask)
 {
        struct buffer_head *head;
        struct buffer_head *bh;
@@ -1713,7 +1745,28 @@ int journal_try_to_free_buffers(journal_t *journal,
                if (buffer_jbd(bh))
                        goto busy;
        } while ((bh = bh->b_this_page) != head);
        ret = try_to_free_buffers(page);
+        /*
+         * There are a number of places where journal_try_to_free_buffers()
+         * could race with journal_commit_transaction(), the later still
+         * holds the reference to the buffers to free while processing them.
+         * try_to_free_buffers() failed to free those buffers. Some of the
+         * caller of releasepage() request page buffers to be dropped, otherwise
+         * treat the fail-to-free as errors (such as generic_file_direct_IO())
+         *
+         * So, if the caller of try_to_release_page() wants the synchronous
+         * behaviour(i.e make sure buffers are dropped upon return),
+         * let's wait for the current transaction to finish flush of
+         * dirty data buffers, then try to free those buffers again,
+         * with the journal locked.
+         */
+        if (ret == 0 && (gfp_mask & __GFP_WAIT) && (gfp_mask & __GFP_FS)) {
+                journal_wait_for_transaction_sync_data(journal);
+                ret = try_to_free_buffers(page);
+        }
 busy:
        return ret;
 }
author	Ingo Molnar <mingo@elte.hu>	2008-08-14 06:19:59 -0400
committer	Ingo Molnar <mingo@elte.hu>	2008-08-14 06:19:59 -0400
commit	8d7ccaa545490cdffdfaff0842436a8dd85cf47b (patch)
tree	8129b5907161bc6ae26deb3645ce1e280c5e1f51 /fs/jbd
parent	b2139aa0eec330c711c5a279db361e5ef1178e78 (diff)
parent	30a2f3c60a84092c8084dfe788b710f8d0768cd4 (diff)