Merge branch 'for-2.6.39/core' of git://git.kernel.dk/linux-2.6-block

* 'for-2.6.39/core' of git://git.kernel.dk/linux-2.6-block: (65 commits) Documentation/iostats.txt: bit-size reference etc. cfq-iosched: removing unnecessary think time checking cfq-iosched: Don't clear queue stats when preempt. blk-throttle: Reset group slice when limits are changed blk-cgroup: Only give unaccounted_time under debug cfq-iosched: Don't set active queue in preempt block: fix non-atomic access to genhd inflight structures block: attempt to merge with existing requests on plug flush block: NULL dereference on error path in __blkdev_get() cfq-iosched: Don't update group weights when on service tree fs: assign sb->s_bdi to default_backing_dev_info if the bdi is going away block: Require subsystems to explicitly allocate bio_set integrity mempool jbd2: finish conversion from WRITE_SYNC_PLUG to WRITE_SYNC and explicit plugging jbd: finish conversion from WRITE_SYNC_PLUG to WRITE_SYNC and explicit plugging fs: make fsync_buffers_list() plug mm: make generic_writepages() use plugging blk-cgroup: Add unaccounted time to timeslice_used. block: fixup plugging stubs for !CONFIG_BLOCK block: remove obsolete comments for blkdev_issue_zeroout. blktrace: Use rq->cmd_flags directly in blk_add_trace_rq. ... Fix up conflicts in fs/{aio.c,super.c}
author: Linus Torvalds <torvalds@linux-foundation.org> 2011-03-24 13:16:26 -0400
committer: Linus Torvalds <torvalds@linux-foundation.org> 2011-03-24 13:16:26 -0400
commit: 6c5103890057b1bb781b26b7aae38d33e4c517d8 (patch)
tree: e6e57961dcddcb5841acb34956e70b9dc696a880 /mm
parent: 3dab04e6978e358ad2307bca563fabd6c5d2c58b (diff)
parent: 9d2e157d970a73b3f270b631828e03eb452d525e (diff)
11 files changed, 35 insertions, 134 deletions
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 027100d30227..8fe9d3407921 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -14,17 +14,11 @@
 static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0);
-void default_unplug_io_fn(struct backing_dev_info *bdi, struct page *page)
-{
-}
-EXPORT_SYMBOL(default_unplug_io_fn);
 struct backing_dev_info default_backing_dev_info = {
        .name           = "default",
        .ra_pages       = VM_MAX_READAHEAD * 1024 / PAGE_CACHE_SIZE,
        .state          = 0,
        .capabilities   = BDI_CAP_MAP_COPY,
-        .unplug_io_fn   = default_unplug_io_fn,
 };
 EXPORT_SYMBOL_GPL(default_backing_dev_info);
@@ -604,7 +598,7 @@ static void bdi_prune_sb(struct backing_dev_info *bdi)
        spin_lock(&sb_lock);
        list_for_each_entry(sb, &super_blocks, s_list) {
                if (sb->s_bdi == bdi)
-                        sb->s_bdi = NULL;
+                        sb->s_bdi = &default_backing_dev_info;
        }
        spin_unlock(&sb_lock);
 }
diff --git a/mm/filemap.c b/mm/filemap.c
index f807afda86f2..04d1992fd86b 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -164,45 +164,15 @@ void delete_from_page_cache(struct page *page)
 }
 EXPORT_SYMBOL(delete_from_page_cache);
-static int sync_page(void *word)
+static int sleep_on_page(void *word)
 {
-        struct address_space *mapping;
-        struct page *page;
-        page = container_of((unsigned long *)word, struct page, flags);
-        /*
-         * page_mapping() is being called without PG_locked held.
-         * Some knowledge of the state and use of the page is used to
-         * reduce the requirements down to a memory barrier.
-         * The danger here is of a stale page_mapping() return value
-         * indicating a struct address_space different from the one it's
-         * associated with when it is associated with one.
-         * After smp_mb(), it's either the correct page_mapping() for
-         * the page, or an old page_mapping() and the page's own
-         * page_mapping() has gone NULL.
-         * The ->sync_page() address_space operation must tolerate
-         * page_mapping() going NULL. By an amazing coincidence,
-         * this comes about because none of the users of the page
-         * in the ->sync_page() methods make essential use of the
-         * page_mapping(), merely passing the page down to the backing
-         * device's unplug functions when it's non-NULL, which in turn
-         * ignore it for all cases but swap, where only page_private(page) is
-         * of interest. When page_mapping() does go NULL, the entire
-         * call stack gracefully ignores the page and returns.
-         * -- wli
-         */
-        smp_mb();
-        mapping = page_mapping(page);
-        if (mapping && mapping->a_ops && mapping->a_ops->sync_page)
-                mapping->a_ops->sync_page(page);
        io_schedule();
        return 0;
 }
-static int sync_page_killable(void *word)
+static int sleep_on_page_killable(void *word)
 {
-        sync_page(word);
+        sleep_on_page(word);
        return fatal_signal_pending(current) ? -EINTR : 0;
 }
@@ -558,12 +528,6 @@ struct page *__page_cache_alloc(gfp_t gfp)
 EXPORT_SYMBOL(__page_cache_alloc);
 #endif
-static int __sleep_on_page_lock(void *word)
-{
-        io_schedule();
-        return 0;
-}
 /*
 * In order to wait for pages to become available there must be
 * waitqueues associated with pages. By using a hash table of
@@ -591,7 +555,7 @@ void wait_on_page_bit(struct page *page, int bit_nr)
        DEFINE_WAIT_BIT(wait, &page->flags, bit_nr);
        if (test_bit(bit_nr, &page->flags))
-                __wait_on_bit(page_waitqueue(page), &wait, sync_page,
+                __wait_on_bit(page_waitqueue(page), &wait, sleep_on_page,
                                                        TASK_UNINTERRUPTIBLE);
 }
 EXPORT_SYMBOL(wait_on_page_bit);
@@ -655,17 +619,12 @@ EXPORT_SYMBOL(end_page_writeback);
 /**
 * __lock_page - get a lock on the page, assuming we need to sleep to get it
 * @page: the page to lock
- *
- * Ugly. Running sync_page() in state TASK_UNINTERRUPTIBLE is scary.  If some
- * random driver's requestfn sets TASK_RUNNING, we could busywait.  However
- * chances are that on the second loop, the block layer's plug list is empty,
- * so sync_page() will then return in state TASK_UNINTERRUPTIBLE.
 */
 void __lock_page(struct page *page)
 {
        DEFINE_WAIT_BIT(wait, &page->flags, PG_locked);
-        __wait_on_bit_lock(page_waitqueue(page), &wait, sync_page,
+        __wait_on_bit_lock(page_waitqueue(page), &wait, sleep_on_page,
                                                        TASK_UNINTERRUPTIBLE);
 }
 EXPORT_SYMBOL(__lock_page);
@@ -675,24 +634,10 @@ int __lock_page_killable(struct page *page)
        DEFINE_WAIT_BIT(wait, &page->flags, PG_locked);
        return __wait_on_bit_lock(page_waitqueue(page), &wait,
-                                        sync_page_killable, TASK_KILLABLE);
+                                        sleep_on_page_killable, TASK_KILLABLE);
 }
 EXPORT_SYMBOL_GPL(__lock_page_killable);
-/**
- * __lock_page_nosync - get a lock on the page, without calling sync_page()
- * @page: the page to lock
- *
- * Variant of lock_page that does not require the caller to hold a reference
- * on the page's mapping.
- */
-void __lock_page_nosync(struct page *page)
-{
-        DEFINE_WAIT_BIT(wait, &page->flags, PG_locked);
-        __wait_on_bit_lock(page_waitqueue(page), &wait, __sleep_on_page_lock,
-                                                        TASK_UNINTERRUPTIBLE);
-}
 int __lock_page_or_retry(struct page *page, struct mm_struct *mm,
                         unsigned int flags)
 {
@@ -1407,12 +1352,15 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
        unsigned long seg = 0;
        size_t count;
        loff_t *ppos = &iocb->ki_pos;
+        struct blk_plug plug;
        count = 0;
        retval = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE);
        if (retval)
                return retval;
+        blk_start_plug(&plug);
        /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */
        if (filp->f_flags & O_DIRECT) {
                loff_t size;
@@ -1485,6 +1433,7 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
                        break;
        }
 out:
+        blk_finish_plug(&plug);
        return retval;
 }
 EXPORT_SYMBOL(generic_file_aio_read);
@@ -2596,11 +2545,13 @@ ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 {
        struct file *file = iocb->ki_filp;
        struct inode *inode = file->f_mapping->host;
+        struct blk_plug plug;
        ssize_t ret;
        BUG_ON(iocb->ki_pos != pos);
        mutex_lock(&inode->i_mutex);
+        blk_start_plug(&plug);
        ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos);
        mutex_unlock(&inode->i_mutex);
@@ -2611,6 +2562,7 @@ ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
                if (err < 0 && ret > 0)
                        ret = err;
        }
+        blk_finish_plug(&plug);
        return ret;
 }
 EXPORT_SYMBOL(generic_file_aio_write);
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index e0af336530c6..37feb9fec228 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -945,7 +945,7 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn,
                collect_procs(ppage, &tokill);
        if (hpage != ppage)
-                lock_page_nosync(ppage);
+                lock_page(ppage);
        ret = try_to_unmap(ppage, ttu);
        if (ret != SWAP_SUCCESS)
@@ -1038,7 +1038,7 @@ int __memory_failure(unsigned long pfn, int trapno, int flags)
                         * Check "just unpoisoned", "filter hit", and
                         * "race with other subpage."
                         */
-                        lock_page_nosync(hpage);
+                        lock_page(hpage);
                        if (!PageHWPoison(hpage)
                            || (hwpoison_filter(p) && TestClearPageHWPoison(p))
                            || (p != hpage && TestSetPageHWPoison(hpage))) {
@@ -1088,7 +1088,7 @@ int __memory_failure(unsigned long pfn, int trapno, int flags)
         * It's very difficult to mess with pages currently under IO
         * and in many cases impossible, so we just avoid it here.
         */
-        lock_page_nosync(hpage);
+        lock_page(hpage);
        /*
         * unpoison always clear PG_hwpoison inside page lock
@@ -1231,7 +1231,7 @@ int unpoison_memory(unsigned long pfn)
                return 0;
        }
-        lock_page_nosync(page);
+        lock_page(page);
        /*
         * This test is racy because PG_hwpoison is set outside of page lock.
         * That's acceptable because that won't trigger kernel panic. Instead,
diff --git a/mm/nommu.c b/mm/nommu.c
index e629143f9440..cb86e7d5e7f5 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1842,10 +1842,6 @@ int remap_vmalloc_range(struct vm_area_struct *vma, void *addr,
 }
 EXPORT_SYMBOL(remap_vmalloc_range);
-void swap_unplug_io_fn(struct backing_dev_info *bdi, struct page *page)
-{
-}
 unsigned long arch_get_unmapped_area(struct file *file, unsigned long addr,
        unsigned long len, unsigned long pgoff, unsigned long flags)
 {
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 632b46479c94..31f698862420 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -1040,11 +1040,17 @@ static int __writepage(struct page *page, struct writeback_control *wbc,
 int generic_writepages(struct address_space *mapping,
                       struct writeback_control *wbc)
 {
+        struct blk_plug plug;
+        int ret;
        /* deal with chardevs and other special file */
        if (!mapping->a_ops->writepage)
                return 0;
-        return write_cache_pages(mapping, wbc, __writepage, mapping);
+        blk_start_plug(&plug);
+        ret = write_cache_pages(mapping, wbc, __writepage, mapping);
+        blk_finish_plug(&plug);
+        return ret;
 }
 EXPORT_SYMBOL(generic_writepages);
@@ -1251,7 +1257,7 @@ int set_page_dirty_lock(struct page *page)
 {
        int ret;
-        lock_page_nosync(page);
+        lock_page(page);
        ret = set_page_dirty(page);
        unlock_page(page);
        return ret;
diff --git a/mm/page_io.c b/mm/page_io.c
index 2dee975bf469..dc76b4d0611e 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -106,7 +106,7 @@ int swap_writepage(struct page *page, struct writeback_control *wbc)
                goto out;
        }
        if (wbc->sync_mode == WB_SYNC_ALL)
-                rw |= REQ_SYNC | REQ_UNPLUG;
+                rw |= REQ_SYNC;
        count_vm_event(PSWPOUT);
        set_page_writeback(page);
        unlock_page(page);
diff --git a/mm/readahead.c b/mm/readahead.c
index 77506a291a2d..2c0cc489e288 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -109,9 +109,12 @@ EXPORT_SYMBOL(read_cache_pages);
 static int read_pages(struct address_space *mapping, struct file *filp,
                struct list_head *pages, unsigned nr_pages)
 {
+        struct blk_plug plug;
        unsigned page_idx;
        int ret;
+        blk_start_plug(&plug);
        if (mapping->a_ops->readpages) {
                ret = mapping->a_ops->readpages(filp, mapping, pages, nr_pages);
                /* Clean up the remaining pages */
@@ -129,7 +132,10 @@ static int read_pages(struct address_space *mapping, struct file *filp,
                page_cache_release(page);
        }
        ret = 0;
 out:
+        blk_finish_plug(&plug);
        return ret;
 }
@@ -554,17 +560,5 @@ page_cache_async_readahead(struct address_space *mapping,
        /* do read-ahead */
        ondemand_readahead(mapping, ra, filp, true, offset, req_size);
-#ifdef CONFIG_BLOCK
-        /*
-         * Normally the current page is !uptodate and lock_page() will be
-         * immediately called to implicitly unplug the device. However this
-         * is not always true for RAID conifgurations, where data arrives
-         * not strictly in their submission order. In this case we need to
-         * explicitly kick off the IO.
-         */
-        if (PageUptodate(page))
-                blk_run_backing_dev(mapping->backing_dev_info, NULL);
-#endif
 }
 EXPORT_SYMBOL_GPL(page_cache_async_readahead);
diff --git a/mm/shmem.c b/mm/shmem.c
index 91ce9a1024d7..58da7c150ba6 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -224,7 +224,6 @@ static const struct vm_operations_struct shmem_vm_ops;
 static struct backing_dev_info shmem_backing_dev_info  __read_mostly = {
        .ra_pages       = 0,    /* No readahead */
        .capabilities   = BDI_CAP_NO_ACCT_AND_WRITEBACK | BDI_CAP_SWAP_BACKED,
-        .unplug_io_fn   = default_unplug_io_fn,
 };
 static LIST_HEAD(shmem_swaplist);
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 5c8cfabbc9bc..46680461785b 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -24,12 +24,10 @@
 /*
 * swapper_space is a fiction, retained to simplify the path through
- * vmscan's shrink_page_list, to make sync_page look nicer, and to allow
+ * vmscan's shrink_page_list.
- * future use of radix_tree tags in the swap cache.
 */
 static const struct address_space_operations swap_aops = {
        .writepage      = swap_writepage,
-        .sync_page      = block_sync_page,
        .set_page_dirty = __set_page_dirty_nobuffers,
        .migratepage    = migrate_page,
 };
@@ -37,7 +35,6 @@ static const struct address_space_operations swap_aops = {
 static struct backing_dev_info swap_backing_dev_info = {
        .name           = "swap",
        .capabilities   = BDI_CAP_NO_ACCT_AND_WRITEBACK | BDI_CAP_SWAP_BACKED,
-        .unplug_io_fn   = swap_unplug_io_fn,
 };
 struct address_space swapper_space = {
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 039e61677635..8c6b3ce38f09 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -95,39 +95,6 @@ __try_to_reclaim_swap(struct swap_info_struct *si, unsigned long offset)
 }
 /*
- * We need this because the bdev->unplug_fn can sleep and we cannot
- * hold swap_lock while calling the unplug_fn. And swap_lock
- * cannot be turned into a mutex.
- */
-static DECLARE_RWSEM(swap_unplug_sem);
-void swap_unplug_io_fn(struct backing_dev_info *unused_bdi, struct page *page)
-{
-        swp_entry_t entry;
-        down_read(&swap_unplug_sem);
-        entry.val = page_private(page);
-        if (PageSwapCache(page)) {
-                struct block_device *bdev = swap_info[swp_type(entry)]->bdev;
-                struct backing_dev_info *bdi;
-                /*
-                 * If the page is removed from swapcache from under us (with a
-                 * racy try_to_unuse/swapoff) we need an additional reference
-                 * count to avoid reading garbage from page_private(page) above.
-                 * If the WARN_ON triggers during a swapoff it maybe the race
-                 * condition and it's harmless. However if it triggers without
-                 * swapoff it signals a problem.
-                 */
-                WARN_ON(page_count(page) <= 1);
-                bdi = bdev->bd_inode->i_mapping->backing_dev_info;
-                blk_run_backing_dev(bdi, page);
-        }
-        up_read(&swap_unplug_sem);
-}
-/*
 * swapon tell device that all the old swap contents can be discarded,
 * to allow the swap device to optimize its wear-levelling.
 */
@@ -1662,10 +1629,6 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
                goto out_dput;
        }
-        /* wait for any unplug function to finish */
-        down_write(&swap_unplug_sem);
-        up_write(&swap_unplug_sem);
        destroy_swap_extents(p);
        if (p->flags & SWP_CONTINUED)
                free_swap_count_continuations(p);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 060e4c191403..f73b8657c2d0 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -358,7 +358,7 @@ static int may_write_to_queue(struct backing_dev_info *bdi,
 static void handle_write_error(struct address_space *mapping,
                                struct page *page, int error)
 {
-        lock_page_nosync(page);
+        lock_page(page);
        if (page_mapping(page) == mapping)
                mapping_set_error(mapping, error);
        unlock_page(page);
author	Linus Torvalds <torvalds@linux-foundation.org>	2011-03-24 13:16:26 -0400
committer	Linus Torvalds <torvalds@linux-foundation.org>	2011-03-24 13:16:26 -0400
commit	6c5103890057b1bb781b26b7aae38d33e4c517d8 (patch)
tree	e6e57961dcddcb5841acb34956e70b9dc696a880 /mm
parent	3dab04e6978e358ad2307bca563fabd6c5d2c58b (diff)
parent	9d2e157d970a73b3f270b631828e03eb452d525e (diff)