block: remove per-queue plugging

Code has been converted over to the new explicit on-stack plugging, and delay users have been converted to use the new API for that. So lets kill off the old plugging along with aops->sync_page(). Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
author: Jens Axboe <jaxboe@fusionio.com> 2011-03-10 02:52:07 -0500
committer: Jens Axboe <jaxboe@fusionio.com> 2011-03-10 02:52:07 -0500
commit: 7eaceaccab5f40bbfda044629a6298616aeaed50 (patch)
tree: 33954d12f63e25a47eb6d86ef3d3d0a5e62bf752 /mm
parent: 73c101011926c5832e6e141682180c4debe2cf45 (diff)
10 files changed, 13 insertions, 131 deletions
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 027100d3022..c91e139a652 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -14,17 +14,11 @@
 static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0);
-void default_unplug_io_fn(struct backing_dev_info *bdi, struct page *page)
-{
-}
-EXPORT_SYMBOL(default_unplug_io_fn);
 struct backing_dev_info default_backing_dev_info = {
        .name           = "default",
        .ra_pages       = VM_MAX_READAHEAD * 1024 / PAGE_CACHE_SIZE,
        .state          = 0,
        .capabilities   = BDI_CAP_MAP_COPY,
-        .unplug_io_fn   = default_unplug_io_fn,
 };
 EXPORT_SYMBOL_GPL(default_backing_dev_info);
diff --git a/mm/filemap.c b/mm/filemap.c
index 83a45d35468..380776c2a9a 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -155,45 +155,15 @@ void remove_from_page_cache(struct page *page)
 }
 EXPORT_SYMBOL(remove_from_page_cache);
-static int sync_page(void *word)
+static int sleep_on_page(void *word)
 {
-        struct address_space *mapping;
-        struct page *page;
-        page = container_of((unsigned long *)word, struct page, flags);
-        /*
-         * page_mapping() is being called without PG_locked held.
-         * Some knowledge of the state and use of the page is used to
-         * reduce the requirements down to a memory barrier.
-         * The danger here is of a stale page_mapping() return value
-         * indicating a struct address_space different from the one it's
-         * associated with when it is associated with one.
-         * After smp_mb(), it's either the correct page_mapping() for
-         * the page, or an old page_mapping() and the page's own
-         * page_mapping() has gone NULL.
-         * The ->sync_page() address_space operation must tolerate
-         * page_mapping() going NULL. By an amazing coincidence,
-         * this comes about because none of the users of the page
-         * in the ->sync_page() methods make essential use of the
-         * page_mapping(), merely passing the page down to the backing
-         * device's unplug functions when it's non-NULL, which in turn
-         * ignore it for all cases but swap, where only page_private(page) is
-         * of interest. When page_mapping() does go NULL, the entire
-         * call stack gracefully ignores the page and returns.
-         * -- wli
-         */
-        smp_mb();
-        mapping = page_mapping(page);
-        if (mapping && mapping->a_ops && mapping->a_ops->sync_page)
-                mapping->a_ops->sync_page(page);
        io_schedule();
        return 0;
 }
-static int sync_page_killable(void *word)
+static int sleep_on_page_killable(void *word)
 {
-        sync_page(word);
+        sleep_on_page(word);
        return fatal_signal_pending(current) ? -EINTR : 0;
 }
@@ -479,12 +449,6 @@ struct page *__page_cache_alloc(gfp_t gfp)
 EXPORT_SYMBOL(__page_cache_alloc);
 #endif
-static int __sleep_on_page_lock(void *word)
-{
-        io_schedule();
-        return 0;
-}
 /*
 * In order to wait for pages to become available there must be
 * waitqueues associated with pages. By using a hash table of
@@ -512,7 +476,7 @@ void wait_on_page_bit(struct page *page, int bit_nr)
        DEFINE_WAIT_BIT(wait, &page->flags, bit_nr);
        if (test_bit(bit_nr, &page->flags))
-                __wait_on_bit(page_waitqueue(page), &wait, sync_page,
+                __wait_on_bit(page_waitqueue(page), &wait, sleep_on_page,
                                                        TASK_UNINTERRUPTIBLE);
 }
 EXPORT_SYMBOL(wait_on_page_bit);
@@ -576,17 +540,12 @@ EXPORT_SYMBOL(end_page_writeback);
 /**
 * __lock_page - get a lock on the page, assuming we need to sleep to get it
 * @page: the page to lock
- *
- * Ugly. Running sync_page() in state TASK_UNINTERRUPTIBLE is scary.  If some
- * random driver's requestfn sets TASK_RUNNING, we could busywait.  However
- * chances are that on the second loop, the block layer's plug list is empty,
- * so sync_page() will then return in state TASK_UNINTERRUPTIBLE.
 */
 void __lock_page(struct page *page)
 {
        DEFINE_WAIT_BIT(wait, &page->flags, PG_locked);
-        __wait_on_bit_lock(page_waitqueue(page), &wait, sync_page,
+        __wait_on_bit_lock(page_waitqueue(page), &wait, sleep_on_page,
                                                        TASK_UNINTERRUPTIBLE);
 }
 EXPORT_SYMBOL(__lock_page);
@@ -596,24 +555,10 @@ int __lock_page_killable(struct page *page)
        DEFINE_WAIT_BIT(wait, &page->flags, PG_locked);
        return __wait_on_bit_lock(page_waitqueue(page), &wait,
-                                        sync_page_killable, TASK_KILLABLE);
+                                        sleep_on_page_killable, TASK_KILLABLE);
 }
 EXPORT_SYMBOL_GPL(__lock_page_killable);
-/**
- * __lock_page_nosync - get a lock on the page, without calling sync_page()
- * @page: the page to lock
- *
- * Variant of lock_page that does not require the caller to hold a reference
- * on the page's mapping.
- */
-void __lock_page_nosync(struct page *page)
-{
-        DEFINE_WAIT_BIT(wait, &page->flags, PG_locked);
-        __wait_on_bit_lock(page_waitqueue(page), &wait, __sleep_on_page_lock,
-                                                        TASK_UNINTERRUPTIBLE);
-}
 int __lock_page_or_retry(struct page *page, struct mm_struct *mm,
                         unsigned int flags)
 {
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 0207c2f6f8b..bfba796d374 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -945,7 +945,7 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn,
                collect_procs(ppage, &tokill);
        if (hpage != ppage)
-                lock_page_nosync(ppage);
+                lock_page(ppage);
        ret = try_to_unmap(ppage, ttu);
        if (ret != SWAP_SUCCESS)
@@ -1038,7 +1038,7 @@ int __memory_failure(unsigned long pfn, int trapno, int flags)
                         * Check "just unpoisoned", "filter hit", and
                         * "race with other subpage."
                         */
-                        lock_page_nosync(hpage);
+                        lock_page(hpage);
                        if (!PageHWPoison(hpage)
                            || (hwpoison_filter(p) && TestClearPageHWPoison(p))
                            || (p != hpage && TestSetPageHWPoison(hpage))) {
@@ -1088,7 +1088,7 @@ int __memory_failure(unsigned long pfn, int trapno, int flags)
         * It's very difficult to mess with pages currently under IO
         * and in many cases impossible, so we just avoid it here.
         */
-        lock_page_nosync(hpage);
+        lock_page(hpage);
        /*
         * unpoison always clear PG_hwpoison inside page lock
@@ -1231,7 +1231,7 @@ int unpoison_memory(unsigned long pfn)
                return 0;
        }
-        lock_page_nosync(page);
+        lock_page(page);
        /*
         * This test is racy because PG_hwpoison is set outside of page lock.
         * That's acceptable because that won't trigger kernel panic. Instead,
diff --git a/mm/nommu.c b/mm/nommu.c
index f59e1424d3d..fb6cbd6abe1 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1842,10 +1842,6 @@ int remap_vmalloc_range(struct vm_area_struct *vma, void *addr,
 }
 EXPORT_SYMBOL(remap_vmalloc_range);
-void swap_unplug_io_fn(struct backing_dev_info *bdi, struct page *page)
-{
-}
 unsigned long arch_get_unmapped_area(struct file *file, unsigned long addr,
        unsigned long len, unsigned long pgoff, unsigned long flags)
 {
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 2cb01f6ec5d..cc0ede169e4 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -1239,7 +1239,7 @@ int set_page_dirty_lock(struct page *page)
 {
        int ret;
-        lock_page_nosync(page);
+        lock_page(page);
        ret = set_page_dirty(page);
        unlock_page(page);
        return ret;
diff --git a/mm/readahead.c b/mm/readahead.c
index 77506a291a2..cbddc3e1724 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -554,17 +554,5 @@ page_cache_async_readahead(struct address_space *mapping,
        /* do read-ahead */
        ondemand_readahead(mapping, ra, filp, true, offset, req_size);
-#ifdef CONFIG_BLOCK
-        /*
-         * Normally the current page is !uptodate and lock_page() will be
-         * immediately called to implicitly unplug the device. However this
-         * is not always true for RAID conifgurations, where data arrives
-         * not strictly in their submission order. In this case we need to
-         * explicitly kick off the IO.
-         */
-        if (PageUptodate(page))
-                blk_run_backing_dev(mapping->backing_dev_info, NULL);
-#endif
 }
 EXPORT_SYMBOL_GPL(page_cache_async_readahead);
diff --git a/mm/shmem.c b/mm/shmem.c
index 5ee67c99060..24d23f5bedf 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -224,7 +224,6 @@ static const struct vm_operations_struct shmem_vm_ops;
 static struct backing_dev_info shmem_backing_dev_info  __read_mostly = {
        .ra_pages       = 0,    /* No readahead */
        .capabilities   = BDI_CAP_NO_ACCT_AND_WRITEBACK | BDI_CAP_SWAP_BACKED,
-        .unplug_io_fn   = default_unplug_io_fn,
 };
 static LIST_HEAD(shmem_swaplist);
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 5c8cfabbc9b..46680461785 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -24,12 +24,10 @@
 /*
 * swapper_space is a fiction, retained to simplify the path through
- * vmscan's shrink_page_list, to make sync_page look nicer, and to allow
+ * vmscan's shrink_page_list.
- * future use of radix_tree tags in the swap cache.
 */
 static const struct address_space_operations swap_aops = {
        .writepage      = swap_writepage,
-        .sync_page      = block_sync_page,
        .set_page_dirty = __set_page_dirty_nobuffers,
        .migratepage    = migrate_page,
 };
@@ -37,7 +35,6 @@ static const struct address_space_operations swap_aops = {
 static struct backing_dev_info swap_backing_dev_info = {
        .name           = "swap",
        .capabilities   = BDI_CAP_NO_ACCT_AND_WRITEBACK | BDI_CAP_SWAP_BACKED,
-        .unplug_io_fn   = swap_unplug_io_fn,
 };
 struct address_space swapper_space = {
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 07a458d72fa..7ceea78ceb2 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -95,39 +95,6 @@ __try_to_reclaim_swap(struct swap_info_struct *si, unsigned long offset)
 }
 /*
- * We need this because the bdev->unplug_fn can sleep and we cannot
- * hold swap_lock while calling the unplug_fn. And swap_lock
- * cannot be turned into a mutex.
- */
-static DECLARE_RWSEM(swap_unplug_sem);
-void swap_unplug_io_fn(struct backing_dev_info *unused_bdi, struct page *page)
-{
-        swp_entry_t entry;
-        down_read(&swap_unplug_sem);
-        entry.val = page_private(page);
-        if (PageSwapCache(page)) {
-                struct block_device *bdev = swap_info[swp_type(entry)]->bdev;
-                struct backing_dev_info *bdi;
-                /*
-                 * If the page is removed from swapcache from under us (with a
-                 * racy try_to_unuse/swapoff) we need an additional reference
-                 * count to avoid reading garbage from page_private(page) above.
-                 * If the WARN_ON triggers during a swapoff it maybe the race
-                 * condition and it's harmless. However if it triggers without
-                 * swapoff it signals a problem.
-                 */
-                WARN_ON(page_count(page) <= 1);
-                bdi = bdev->bd_inode->i_mapping->backing_dev_info;
-                blk_run_backing_dev(bdi, page);
-        }
-        up_read(&swap_unplug_sem);
-}
-/*
 * swapon tell device that all the old swap contents can be discarded,
 * to allow the swap device to optimize its wear-levelling.
 */
@@ -1643,10 +1610,6 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
                goto out_dput;
        }
-        /* wait for any unplug function to finish */
-        down_write(&swap_unplug_sem);
-        up_write(&swap_unplug_sem);
        destroy_swap_extents(p);
        if (p->flags & SWP_CONTINUED)
                free_swap_count_continuations(p);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 17497d0cd8b..251bed73ac0 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -358,7 +358,7 @@ static int may_write_to_queue(struct backing_dev_info *bdi,
 static void handle_write_error(struct address_space *mapping,
                                struct page *page, int error)
 {
-        lock_page_nosync(page);
+        lock_page(page);
        if (page_mapping(page) == mapping)
                mapping_set_error(mapping, error);
        unlock_page(page);
author	Jens Axboe <jaxboe@fusionio.com>	2011-03-10 02:52:07 -0500
committer	Jens Axboe <jaxboe@fusionio.com>	2011-03-10 02:52:07 -0500
commit	7eaceaccab5f40bbfda044629a6298616aeaed50 (patch)
tree	33954d12f63e25a47eb6d86ef3d3d0a5e62bf752 /mm
parent	73c101011926c5832e6e141682180c4debe2cf45 (diff)