Merge branch 'for-3.10/core' of git://git.kernel.dk/linux-block

Pull block core updates from Jens Axboe: - Major bit is Kents prep work for immutable bio vecs. - Stable candidate fix for a scheduling-while-atomic in the queue bypass operation. - Fix for the hang on exceeded rq->datalen 32-bit unsigned when merging discard bios. - Tejuns changes to convert the writeback thread pool to the generic workqueue mechanism. - Runtime PM framework, SCSI patches exists on top of these in James' tree. - A few random fixes. * 'for-3.10/core' of git://git.kernel.dk/linux-block: (40 commits) relay: move remove_buf_file inside relay_close_buf partitions/efi.c: replace useless kzalloc's by kmalloc's fs/block_dev.c: fix iov_shorten() criteria in blkdev_aio_read() block: fix max discard sectors limit blkcg: fix "scheduling while atomic" in blk_queue_bypass_start Documentation: cfq-iosched: update documentation help for cfq tunables writeback: expose the bdi_wq workqueue writeback: replace custom worker pool implementation with unbound workqueue writeback: remove unused bdi_pending_list aoe: Fix unitialized var usage bio-integrity: Add explicit field for owner of bip_buf block: Add an explicit bio flag for bios that own their bvec block: Add bio_alloc_pages() block: Convert some code to bio_for_each_segment_all() block: Add bio_for_each_segment_all() bounce: Refactor __blk_queue_bounce to not use bi_io_vec raid1: use bio_copy_data() pktcdvd: Use bio_reset() in disabled code to kill bi_idx usage pktcdvd: use bio_copy_data() block: Add bio_copy_data() ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2013-05-08 13:13:35 -0400
committer: Linus Torvalds <torvalds@linux-foundation.org> 2013-05-08 13:13:35 -0400
commit: 4de13d7aa8f4d02f4dc99d4609575659f92b3c5a (patch)
tree: 3bc9729eabe79c6164cd29a5d605000bc82bf837 /mm
parent: 5af43c24ca59a448c9312dd4a4a51d27ec3b9a73 (diff)
parent: b8d4a5bf6a049303a29a3275f463f09a490b50ea (diff)
3 files changed, 49 insertions, 286 deletions
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 41733c5dc820..502517492258 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -31,13 +31,14 @@ EXPORT_SYMBOL_GPL(noop_backing_dev_info);
 static struct class *bdi_class;
 /*
- * bdi_lock protects updates to bdi_list and bdi_pending_list, as well as
+ * bdi_lock protects updates to bdi_list. bdi_list has RCU reader side
- * reader side protection for bdi_pending_list. bdi_list has RCU reader side
 * locking.
 */
 DEFINE_SPINLOCK(bdi_lock);
 LIST_HEAD(bdi_list);
-LIST_HEAD(bdi_pending_list);
+/* bdi_wq serves all asynchronous writeback tasks */
+struct workqueue_struct *bdi_wq;
 void bdi_lock_two(struct bdi_writeback *wb1, struct bdi_writeback *wb2)
 {
@@ -257,6 +258,11 @@ static int __init default_bdi_init(void)
 {
        int err;
+        bdi_wq = alloc_workqueue("writeback", WQ_MEM_RECLAIM | WQ_FREEZABLE |
+                                              WQ_UNBOUND | WQ_SYSFS, 0);
+        if (!bdi_wq)
+                return -ENOMEM;
        err = bdi_init(&default_backing_dev_info);
        if (!err)
                bdi_register(&default_backing_dev_info, NULL, "default");
@@ -271,26 +277,6 @@ int bdi_has_dirty_io(struct backing_dev_info *bdi)
        return wb_has_dirty_io(&bdi->wb);
 }
-static void wakeup_timer_fn(unsigned long data)
-{
-        struct backing_dev_info *bdi = (struct backing_dev_info *)data;
-        spin_lock_bh(&bdi->wb_lock);
-        if (bdi->wb.task) {
-                trace_writeback_wake_thread(bdi);
-                wake_up_process(bdi->wb.task);
-        } else if (bdi->dev) {
-                /*
-                 * When bdi tasks are inactive for long time, they are killed.
-                 * In this case we have to wake-up the forker thread which
-                 * should create and run the bdi thread.
-                 */
-                trace_writeback_wake_forker_thread(bdi);
-                wake_up_process(default_backing_dev_info.wb.task);
-        }
-        spin_unlock_bh(&bdi->wb_lock);
-}
 /*
 * This function is used when the first inode for this bdi is marked dirty. It
 * wakes-up the corresponding bdi thread which should then take care of the
@@ -307,176 +293,7 @@ void bdi_wakeup_thread_delayed(struct backing_dev_info *bdi)
        unsigned long timeout;
        timeout = msecs_to_jiffies(dirty_writeback_interval * 10);
-        mod_timer(&bdi->wb.wakeup_timer, jiffies + timeout);
+        mod_delayed_work(bdi_wq, &bdi->wb.dwork, timeout);
-}
-/*
- * Calculate the longest interval (jiffies) bdi threads are allowed to be
- * inactive.
- */
-static unsigned long bdi_longest_inactive(void)
-{
-        unsigned long interval;
-        interval = msecs_to_jiffies(dirty_writeback_interval * 10);
-        return max(5UL * 60 * HZ, interval);
-}
-/*
- * Clear pending bit and wakeup anybody waiting for flusher thread creation or
- * shutdown
- */
-static void bdi_clear_pending(struct backing_dev_info *bdi)
-{
-        clear_bit(BDI_pending, &bdi->state);
-        smp_mb__after_clear_bit();
-        wake_up_bit(&bdi->state, BDI_pending);
-}
-static int bdi_forker_thread(void *ptr)
-{
-        struct bdi_writeback *me = ptr;
-        current->flags |= PF_SWAPWRITE;
-        set_freezable();
-        /*
-         * Our parent may run at a different priority, just set us to normal
-         */
-        set_user_nice(current, 0);
-        for (;;) {
-                struct task_struct *task = NULL;
-                struct backing_dev_info *bdi;
-                enum {
-                        NO_ACTION,   /* Nothing to do */
-                        FORK_THREAD, /* Fork bdi thread */
-                        KILL_THREAD, /* Kill inactive bdi thread */
-                } action = NO_ACTION;
-                /*
-                 * Temporary measure, we want to make sure we don't see
-                 * dirty data on the default backing_dev_info
-                 */
-                if (wb_has_dirty_io(me) || !list_empty(&me->bdi->work_list)) {
-                        del_timer(&me->wakeup_timer);
-                        wb_do_writeback(me, 0);
-                }
-                spin_lock_bh(&bdi_lock);
-                /*
-                 * In the following loop we are going to check whether we have
-                 * some work to do without any synchronization with tasks
-                 * waking us up to do work for them. Set the task state here
-                 * so that we don't miss wakeups after verifying conditions.
-                 */
-                set_current_state(TASK_INTERRUPTIBLE);
-                list_for_each_entry(bdi, &bdi_list, bdi_list) {
-                        bool have_dirty_io;
-                        if (!bdi_cap_writeback_dirty(bdi) ||
-                             bdi_cap_flush_forker(bdi))
-                                continue;
-                        WARN(!test_bit(BDI_registered, &bdi->state),
-                             "bdi %p/%s is not registered!\n", bdi, bdi->name);
-                        have_dirty_io = !list_empty(&bdi->work_list) ||
-                                        wb_has_dirty_io(&bdi->wb);
-                        /*
-                         * If the bdi has work to do, but the thread does not
-                         * exist - create it.
-                         */
-                        if (!bdi->wb.task && have_dirty_io) {
-                                /*
-                                 * Set the pending bit - if someone will try to
-                                 * unregister this bdi - it'll wait on this bit.
-                                 */
-                                set_bit(BDI_pending, &bdi->state);
-                                action = FORK_THREAD;
-                                break;
-                        }
-                        spin_lock(&bdi->wb_lock);
-                        /*
-                         * If there is no work to do and the bdi thread was
-                         * inactive long enough - kill it. The wb_lock is taken
-                         * to make sure no-one adds more work to this bdi and
-                         * wakes the bdi thread up.
-                         */
-                        if (bdi->wb.task && !have_dirty_io &&
-                            time_after(jiffies, bdi->wb.last_active +
-                                                bdi_longest_inactive())) {
-                                task = bdi->wb.task;
-                                bdi->wb.task = NULL;
-                                spin_unlock(&bdi->wb_lock);
-                                set_bit(BDI_pending, &bdi->state);
-                                action = KILL_THREAD;
-                                break;
-                        }
-                        spin_unlock(&bdi->wb_lock);
-                }
-                spin_unlock_bh(&bdi_lock);
-                /* Keep working if default bdi still has things to do */
-                if (!list_empty(&me->bdi->work_list))
-                        __set_current_state(TASK_RUNNING);
-                switch (action) {
-                case FORK_THREAD:
-                        __set_current_state(TASK_RUNNING);
-                        task = kthread_create(bdi_writeback_thread, &bdi->wb,
-                                              "flush-%s", dev_name(bdi->dev));
-                        if (IS_ERR(task)) {
-                                /*
-                                 * If thread creation fails, force writeout of
-                                 * the bdi from the thread. Hopefully 1024 is
-                                 * large enough for efficient IO.
-                                 */
-                                writeback_inodes_wb(&bdi->wb, 1024,
-                                                    WB_REASON_FORKER_THREAD);
-                        } else {
-                                /*
-                                 * The spinlock makes sure we do not lose
-                                 * wake-ups when racing with 'bdi_queue_work()'.
-                                 * And as soon as the bdi thread is visible, we
-                                 * can start it.
-                                 */
-                                spin_lock_bh(&bdi->wb_lock);
-                                bdi->wb.task = task;
-                                spin_unlock_bh(&bdi->wb_lock);
-                                wake_up_process(task);
-                        }
-                        bdi_clear_pending(bdi);
-                        break;
-                case KILL_THREAD:
-                        __set_current_state(TASK_RUNNING);
-                        kthread_stop(task);
-                        bdi_clear_pending(bdi);
-                        break;
-                case NO_ACTION:
-                        if (!wb_has_dirty_io(me) || !dirty_writeback_interval)
-                                /*
-                                 * There are no dirty data. The only thing we
-                                 * should now care about is checking for
-                                 * inactive bdi threads and killing them. Thus,
-                                 * let's sleep for longer time, save energy and
-                                 * be friendly for battery-driven devices.
-                                 */
-                                schedule_timeout(bdi_longest_inactive());
-                        else
-                                schedule_timeout(msecs_to_jiffies(dirty_writeback_interval * 10));
-                        try_to_freeze();
-                        break;
-                }
-        }
-        return 0;
 }
 /*
@@ -489,6 +306,9 @@ static void bdi_remove_from_list(struct backing_dev_info *bdi)
        spin_unlock_bh(&bdi_lock);
        synchronize_rcu_expedited();
+        /* bdi_list is now unused, clear it to mark @bdi dying */
+        INIT_LIST_HEAD(&bdi->bdi_list);
 }
 int bdi_register(struct backing_dev_info *bdi, struct device *parent,
@@ -508,20 +328,6 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent,
        bdi->dev = dev;
-        /*
-         * Just start the forker thread for our default backing_dev_info,
-         * and add other bdi's to the list. They will get a thread created
-         * on-demand when they need it.
-         */
-        if (bdi_cap_flush_forker(bdi)) {
-                struct bdi_writeback *wb = &bdi->wb;
-                wb->task = kthread_run(bdi_forker_thread, wb, "bdi-%s",
-                                                dev_name(dev));
-                if (IS_ERR(wb->task))
-                        return PTR_ERR(wb->task);
-        }
        bdi_debug_register(bdi, dev_name(dev));
        set_bit(BDI_registered, &bdi->state);
@@ -545,8 +351,6 @@ EXPORT_SYMBOL(bdi_register_dev);
 */
 static void bdi_wb_shutdown(struct backing_dev_info *bdi)
 {
-        struct task_struct *task;
        if (!bdi_cap_writeback_dirty(bdi))
                return;
@@ -556,22 +360,20 @@ static void bdi_wb_shutdown(struct backing_dev_info *bdi)
        bdi_remove_from_list(bdi);
        /*
-         * If setup is pending, wait for that to complete first
+         * Drain work list and shutdown the delayed_work.  At this point,
+         * @bdi->bdi_list is empty telling bdi_Writeback_workfn() that @bdi
+         * is dying and its work_list needs to be drained no matter what.
         */
-        wait_on_bit(&bdi->state, BDI_pending, bdi_sched_wait,
+        mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0);
-                        TASK_UNINTERRUPTIBLE);
+        flush_delayed_work(&bdi->wb.dwork);
+        WARN_ON(!list_empty(&bdi->work_list));
        /*
-         * Finally, kill the kernel thread. We don't need to be RCU
+         * This shouldn't be necessary unless @bdi for some reason has
-         * safe anymore, since the bdi is gone from visibility.
+         * unflushed dirty IO after work_list is drained.  Do it anyway
+         * just in case.
         */
-        spin_lock_bh(&bdi->wb_lock);
+        cancel_delayed_work_sync(&bdi->wb.dwork);
-        task = bdi->wb.task;
-        bdi->wb.task = NULL;
-        spin_unlock_bh(&bdi->wb_lock);
-        if (task)
-                kthread_stop(task);
 }
 /*
@@ -597,10 +399,8 @@ void bdi_unregister(struct backing_dev_info *bdi)
                bdi_set_min_ratio(bdi, 0);
                trace_writeback_bdi_unregister(bdi);
                bdi_prune_sb(bdi);
-                del_timer_sync(&bdi->wb.wakeup_timer);
-                if (!bdi_cap_flush_forker(bdi))
+                bdi_wb_shutdown(bdi);
-                        bdi_wb_shutdown(bdi);
                bdi_debug_unregister(bdi);
                spin_lock_bh(&bdi->wb_lock);
@@ -622,7 +422,7 @@ static void bdi_wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi)
        INIT_LIST_HEAD(&wb->b_io);
        INIT_LIST_HEAD(&wb->b_more_io);
        spin_lock_init(&wb->list_lock);
-        setup_timer(&wb->wakeup_timer, wakeup_timer_fn, (unsigned long)bdi);
+        INIT_DELAYED_WORK(&wb->dwork, bdi_writeback_workfn);
 }
 /*
@@ -695,12 +495,11 @@ void bdi_destroy(struct backing_dev_info *bdi)
        bdi_unregister(bdi);
        /*
-         * If bdi_unregister() had already been called earlier, the
+         * If bdi_unregister() had already been called earlier, the dwork
-         * wakeup_timer could still be armed because bdi_prune_sb()
+         * could still be pending because bdi_prune_sb() can race with the
-         * can race with the bdi_wakeup_thread_delayed() calls from
+         * bdi_wakeup_thread_delayed() calls from __mark_inode_dirty().
-         * __mark_inode_dirty().
         */
-        del_timer_sync(&bdi->wb.wakeup_timer);
+        cancel_delayed_work_sync(&bdi->wb.dwork);
        for (i = 0; i < NR_BDI_STAT_ITEMS; i++)
                percpu_counter_destroy(&bdi->bdi_stat[i]);
diff --git a/mm/bounce.c b/mm/bounce.c
index a5c2ec3589cb..c9f0a4339a7d 100644
--- a/mm/bounce.c
+++ b/mm/bounce.c
@@ -101,7 +101,7 @@ static void copy_to_high_bio_irq(struct bio *to, struct bio *from)
        struct bio_vec *tovec, *fromvec;
        int i;
-        __bio_for_each_segment(tovec, to, i, 0) {
+        bio_for_each_segment(tovec, to, i) {
                fromvec = from->bi_io_vec + i;
                /*
@@ -134,7 +134,7 @@ static void bounce_end_io(struct bio *bio, mempool_t *pool, int err)
        /*
         * free up bounce indirect pages used
         */
-        __bio_for_each_segment(bvec, bio, i, 0) {
+        bio_for_each_segment_all(bvec, bio, i) {
                org_vec = bio_orig->bi_io_vec + i;
                if (bvec->bv_page == org_vec->bv_page)
                        continue;
@@ -199,78 +199,43 @@ static int must_snapshot_stable_pages(struct request_queue *q, struct bio *bio)
 static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,
                               mempool_t *pool, int force)
 {
-        struct page *page;
+        struct bio *bio;
-        struct bio *bio = NULL;
+        int rw = bio_data_dir(*bio_orig);
-        int i, rw = bio_data_dir(*bio_orig);
        struct bio_vec *to, *from;
+        unsigned i;
-        bio_for_each_segment(from, *bio_orig, i) {
+        bio_for_each_segment(from, *bio_orig, i)
-                page = from->bv_page;
+                if (page_to_pfn(from->bv_page) > queue_bounce_pfn(q))
+                        goto bounce;
-                /*
+        return;
-                 * is destination page below bounce pfn?
+bounce:
-                 */
+        bio = bio_clone_bioset(*bio_orig, GFP_NOIO, fs_bio_set);
-                if (page_to_pfn(page) <= queue_bounce_pfn(q) && !force)
-                        continue;
-                /*
-                 * irk, bounce it
-                 */
-                if (!bio) {
-                        unsigned int cnt = (*bio_orig)->bi_vcnt;
-                        bio = bio_alloc(GFP_NOIO, cnt);
+        bio_for_each_segment_all(to, bio, i) {
-                        memset(bio->bi_io_vec, 0, cnt * sizeof(struct bio_vec));
+                struct page *page = to->bv_page;
-                }
-                        
-                to = bio->bi_io_vec + i;
+                if (page_to_pfn(page) <= queue_bounce_pfn(q) && !force)
+                        continue;
-                to->bv_page = mempool_alloc(pool, q->bounce_gfp);
-                to->bv_len = from->bv_len;
-                to->bv_offset = from->bv_offset;
                inc_zone_page_state(to->bv_page, NR_BOUNCE);
+                to->bv_page = mempool_alloc(pool, q->bounce_gfp);
                if (rw == WRITE) {
                        char *vto, *vfrom;
-                        flush_dcache_page(from->bv_page);
+                        flush_dcache_page(page);
                        vto = page_address(to->bv_page) + to->bv_offset;
-                        vfrom = kmap(from->bv_page) + from->bv_offset;
+                        vfrom = kmap_atomic(page) + to->bv_offset;
                        memcpy(vto, vfrom, to->bv_len);
-                        kunmap(from->bv_page);
+                        kunmap_atomic(vfrom);
                }
        }
-        /*
-         * no pages bounced
-         */
-        if (!bio)
-                return;
        trace_block_bio_bounce(q, *bio_orig);
-        /*
-         * at least one page was bounced, fill in possible non-highmem
-         * pages
-         */
-        __bio_for_each_segment(from, *bio_orig, i, 0) {
-                to = bio_iovec_idx(bio, i);
-                if (!to->bv_page) {
-                        to->bv_page = from->bv_page;
-                        to->bv_len = from->bv_len;
-                        to->bv_offset = from->bv_offset;
-                }
-        }
-        bio->bi_bdev = (*bio_orig)->bi_bdev;
        bio->bi_flags |= (1 << BIO_BOUNCED);
-        bio->bi_sector = (*bio_orig)->bi_sector;
-        bio->bi_rw = (*bio_orig)->bi_rw;
-        bio->bi_vcnt = (*bio_orig)->bi_vcnt;
-        bio->bi_idx = (*bio_orig)->bi_idx;
-        bio->bi_size = (*bio_orig)->bi_size;
        if (pool == page_pool) {
                bio->bi_end_io = bounce_end_io_write;
diff --git a/mm/page_io.c b/mm/page_io.c
index 06a8842a6ec6..a8a3ef45fed7 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -36,7 +36,6 @@ static struct bio *get_swap_bio(gfp_t gfp_flags,
                bio->bi_io_vec[0].bv_len = PAGE_SIZE;
                bio->bi_io_vec[0].bv_offset = 0;
                bio->bi_vcnt = 1;
-                bio->bi_idx = 0;
                bio->bi_size = PAGE_SIZE;
                bio->bi_end_io = end_io;
        }
author	Linus Torvalds <torvalds@linux-foundation.org>	2013-05-08 13:13:35 -0400
committer	Linus Torvalds <torvalds@linux-foundation.org>	2013-05-08 13:13:35 -0400
commit	4de13d7aa8f4d02f4dc99d4609575659f92b3c5a (patch)
tree	3bc9729eabe79c6164cd29a5d605000bc82bf837 /mm
parent	5af43c24ca59a448c9312dd4a4a51d27ec3b9a73 (diff)
parent	b8d4a5bf6a049303a29a3275f463f09a490b50ea (diff)

diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 41733c5dc820..502517492258 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c
@@ -31,13 +31,14 @@ EXPORT_SYMBOL_GPL(noop_backing_dev_info);
31	static struct class *bdi_class;	31	static struct class *bdi_class;
32		32
33	/*	33	/*
34	* bdi_lock protects updates to bdi_list and bdi_pending_list, as well as	34	* bdi_lock protects updates to bdi_list. bdi_list has RCU reader side
35	* reader side protection for bdi_pending_list. bdi_list has RCU reader side
36	* locking.	35	* locking.
37	*/	36	*/
38	DEFINE_SPINLOCK(bdi_lock);	37	DEFINE_SPINLOCK(bdi_lock);
39	LIST_HEAD(bdi_list);	38	LIST_HEAD(bdi_list);
40	LIST_HEAD(bdi_pending_list);	39
		40	/* bdi_wq serves all asynchronous writeback tasks */
		41	struct workqueue_struct *bdi_wq;
41		42
42	void bdi_lock_two(struct bdi_writeback wb1, struct bdi_writeback wb2)	43	void bdi_lock_two(struct bdi_writeback wb1, struct bdi_writeback wb2)
43	{	44	{
@@ -257,6 +258,11 @@ static int __init default_bdi_init(void)
257	{	258	{
258	int err;	259	int err;
259		260
		261	bdi_wq = alloc_workqueue("writeback", WQ_MEM_RECLAIM \| WQ_FREEZABLE \|
		262	WQ_UNBOUND \| WQ_SYSFS, 0);
		263	if (!bdi_wq)
		264	return -ENOMEM;
		265
260	err = bdi_init(&default_backing_dev_info);	266	err = bdi_init(&default_backing_dev_info);
261	if (!err)	267	if (!err)
262	bdi_register(&default_backing_dev_info, NULL, "default");	268	bdi_register(&default_backing_dev_info, NULL, "default");
@@ -271,26 +277,6 @@ int bdi_has_dirty_io(struct backing_dev_info *bdi)
271	return wb_has_dirty_io(&bdi->wb);	277	return wb_has_dirty_io(&bdi->wb);
272	}	278	}
273		279
274	static void wakeup_timer_fn(unsigned long data)
275	{
276	struct backing_dev_info bdi = (struct backing_dev_info )data;
277
278	spin_lock_bh(&bdi->wb_lock);
279	if (bdi->wb.task) {
280	trace_writeback_wake_thread(bdi);
281	wake_up_process(bdi->wb.task);
282	} else if (bdi->dev) {
283	/*
284	* When bdi tasks are inactive for long time, they are killed.
285	* In this case we have to wake-up the forker thread which
286	* should create and run the bdi thread.
287	*/
288	trace_writeback_wake_forker_thread(bdi);
289	wake_up_process(default_backing_dev_info.wb.task);
290	}
291	spin_unlock_bh(&bdi->wb_lock);
292	}
293
294	/*	280	/*
295	* This function is used when the first inode for this bdi is marked dirty. It	281	* This function is used when the first inode for this bdi is marked dirty. It
296	* wakes-up the corresponding bdi thread which should then take care of the	282	* wakes-up the corresponding bdi thread which should then take care of the
@@ -307,176 +293,7 @@ void bdi_wakeup_thread_delayed(struct backing_dev_info *bdi)
307	unsigned long timeout;	293	unsigned long timeout;
308		294
309	timeout = msecs_to_jiffies(dirty_writeback_interval * 10);	295	timeout = msecs_to_jiffies(dirty_writeback_interval * 10);
310	mod_timer(&bdi->wb.wakeup_timer, jiffies + timeout);	296	mod_delayed_work(bdi_wq, &bdi->wb.dwork, timeout);
311	}
312
313	/*
314	* Calculate the longest interval (jiffies) bdi threads are allowed to be
315	* inactive.
316	*/
317	static unsigned long bdi_longest_inactive(void)
318	{
319	unsigned long interval;
320
321	interval = msecs_to_jiffies(dirty_writeback_interval * 10);
322	return max(5UL * 60 * HZ, interval);
323	}
324
325	/*
326	* Clear pending bit and wakeup anybody waiting for flusher thread creation or
327	* shutdown
328	*/
329	static void bdi_clear_pending(struct backing_dev_info *bdi)
330	{
331	clear_bit(BDI_pending, &bdi->state);
332	smp_mb__after_clear_bit();
333	wake_up_bit(&bdi->state, BDI_pending);
334	}
335
336	static int bdi_forker_thread(void *ptr)
337	{
338	struct bdi_writeback *me = ptr;
339
340	current->flags \|= PF_SWAPWRITE;
341	set_freezable();
342
343	/*
344	* Our parent may run at a different priority, just set us to normal
345	*/
346	set_user_nice(current, 0);
347
348	for (;;) {
349	struct task_struct *task = NULL;
350	struct backing_dev_info *bdi;
351	enum {
352	NO_ACTION, /* Nothing to do */
353	FORK_THREAD, /* Fork bdi thread */
354	KILL_THREAD, /* Kill inactive bdi thread */
355	} action = NO_ACTION;
356
357	/*
358	* Temporary measure, we want to make sure we don't see
359	* dirty data on the default backing_dev_info
360	*/
361	if (wb_has_dirty_io(me) \|\| !list_empty(&me->bdi->work_list)) {
362	del_timer(&me->wakeup_timer);
363	wb_do_writeback(me, 0);
364	}
365
366	spin_lock_bh(&bdi_lock);
367	/*
368	* In the following loop we are going to check whether we have
369	* some work to do without any synchronization with tasks
370	* waking us up to do work for them. Set the task state here
371	* so that we don't miss wakeups after verifying conditions.
372	*/
373	set_current_state(TASK_INTERRUPTIBLE);
374
375	list_for_each_entry(bdi, &bdi_list, bdi_list) {
376	bool have_dirty_io;
377
378	if (!bdi_cap_writeback_dirty(bdi) \|\|
379	bdi_cap_flush_forker(bdi))
380	continue;
381
382	WARN(!test_bit(BDI_registered, &bdi->state),
383	"bdi %p/%s is not registered!\n", bdi, bdi->name);
384
385	have_dirty_io = !list_empty(&bdi->work_list) \|\|
386	wb_has_dirty_io(&bdi->wb);
387
388	/*
389	* If the bdi has work to do, but the thread does not
390	* exist - create it.
391	*/
392	if (!bdi->wb.task && have_dirty_io) {
393	/*
394	* Set the pending bit - if someone will try to
395	* unregister this bdi - it'll wait on this bit.
396	*/
397	set_bit(BDI_pending, &bdi->state);
398	action = FORK_THREAD;
399	break;
400	}
401
402	spin_lock(&bdi->wb_lock);
403
404	/*
405	* If there is no work to do and the bdi thread was
406	* inactive long enough - kill it. The wb_lock is taken
407	* to make sure no-one adds more work to this bdi and
408	* wakes the bdi thread up.
409	*/
410	if (bdi->wb.task && !have_dirty_io &&
411	time_after(jiffies, bdi->wb.last_active +
412	bdi_longest_inactive())) {
413	task = bdi->wb.task;
414	bdi->wb.task = NULL;
415	spin_unlock(&bdi->wb_lock);
416	set_bit(BDI_pending, &bdi->state);
417	action = KILL_THREAD;
418	break;
419	}
420	spin_unlock(&bdi->wb_lock);
421	}
422	spin_unlock_bh(&bdi_lock);
423
424	/* Keep working if default bdi still has things to do */
425	if (!list_empty(&me->bdi->work_list))
426	__set_current_state(TASK_RUNNING);
427
428	switch (action) {
429	case FORK_THREAD:
430	__set_current_state(TASK_RUNNING);
431	task = kthread_create(bdi_writeback_thread, &bdi->wb,
432	"flush-%s", dev_name(bdi->dev));
433	if (IS_ERR(task)) {
434	/*
435	* If thread creation fails, force writeout of
436	* the bdi from the thread. Hopefully 1024 is
437	* large enough for efficient IO.
438	*/
439	writeback_inodes_wb(&bdi->wb, 1024,
440	WB_REASON_FORKER_THREAD);
441	} else {
442	/*
443	* The spinlock makes sure we do not lose
444	* wake-ups when racing with 'bdi_queue_work()'.
445	* And as soon as the bdi thread is visible, we
446	* can start it.
447	*/
448	spin_lock_bh(&bdi->wb_lock);
449	bdi->wb.task = task;
450	spin_unlock_bh(&bdi->wb_lock);
451	wake_up_process(task);
452	}
453	bdi_clear_pending(bdi);
454	break;
455
456	case KILL_THREAD:
457	__set_current_state(TASK_RUNNING);
458	kthread_stop(task);
459	bdi_clear_pending(bdi);
460	break;
461
462	case NO_ACTION:
463	if (!wb_has_dirty_io(me) \|\| !dirty_writeback_interval)
464	/*
465	* There are no dirty data. The only thing we
466	* should now care about is checking for
467	* inactive bdi threads and killing them. Thus,
468	* let's sleep for longer time, save energy and
469	* be friendly for battery-driven devices.
470	*/
471	schedule_timeout(bdi_longest_inactive());
472	else
473	schedule_timeout(msecs_to_jiffies(dirty_writeback_interval * 10));
474	try_to_freeze();
475	break;
476	}
477	}
478
479	return 0;
480	}	297	}
481		298
482	/*	299	/*
@@ -489,6 +306,9 @@ static void bdi_remove_from_list(struct backing_dev_info *bdi)
489	spin_unlock_bh(&bdi_lock);	306	spin_unlock_bh(&bdi_lock);
490		307
491	synchronize_rcu_expedited();	308	synchronize_rcu_expedited();
		309
		310	/* bdi_list is now unused, clear it to mark @bdi dying */
		311	INIT_LIST_HEAD(&bdi->bdi_list);
492	}	312	}
493		313
494	int bdi_register(struct backing_dev_info bdi, struct device parent,	314	int bdi_register(struct backing_dev_info bdi, struct device parent,
@@ -508,20 +328,6 @@ int bdi_register(struct backing_dev_info bdi, struct device parent,
508		328
509	bdi->dev = dev;	329	bdi->dev = dev;
510		330
511	/*
512	* Just start the forker thread for our default backing_dev_info,
513	* and add other bdi's to the list. They will get a thread created
514	* on-demand when they need it.
515	*/
516	if (bdi_cap_flush_forker(bdi)) {
517	struct bdi_writeback *wb = &bdi->wb;
518
519	wb->task = kthread_run(bdi_forker_thread, wb, "bdi-%s",
520	dev_name(dev));
521	if (IS_ERR(wb->task))
522	return PTR_ERR(wb->task);
523	}
524
525	bdi_debug_register(bdi, dev_name(dev));	331	bdi_debug_register(bdi, dev_name(dev));
526	set_bit(BDI_registered, &bdi->state);	332	set_bit(BDI_registered, &bdi->state);
527		333
@@ -545,8 +351,6 @@ EXPORT_SYMBOL(bdi_register_dev);
545	*/	351	*/
546	static void bdi_wb_shutdown(struct backing_dev_info *bdi)	352	static void bdi_wb_shutdown(struct backing_dev_info *bdi)
547	{	353	{
548	struct task_struct *task;
549
550	if (!bdi_cap_writeback_dirty(bdi))	354	if (!bdi_cap_writeback_dirty(bdi))
551	return;	355	return;
552		356
@@ -556,22 +360,20 @@ static void bdi_wb_shutdown(struct backing_dev_info *bdi)
556	bdi_remove_from_list(bdi);	360	bdi_remove_from_list(bdi);
557		361
558	/*	362	/*
559	* If setup is pending, wait for that to complete first	363	* Drain work list and shutdown the delayed_work. At this point,
		364	* @bdi->bdi_list is empty telling bdi_Writeback_workfn() that @bdi
		365	* is dying and its work_list needs to be drained no matter what.
560	*/	366	*/
561	wait_on_bit(&bdi->state, BDI_pending, bdi_sched_wait,	367	mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0);
562	TASK_UNINTERRUPTIBLE);	368	flush_delayed_work(&bdi->wb.dwork);
		369	WARN_ON(!list_empty(&bdi->work_list));
563		370
564	/*	371	/*
565	* Finally, kill the kernel thread. We don't need to be RCU	372	* This shouldn't be necessary unless @bdi for some reason has
566	* safe anymore, since the bdi is gone from visibility.	373	* unflushed dirty IO after work_list is drained. Do it anyway
		374	* just in case.
567	*/	375	*/
568	spin_lock_bh(&bdi->wb_lock);	376	cancel_delayed_work_sync(&bdi->wb.dwork);
569	task = bdi->wb.task;
570	bdi->wb.task = NULL;
571	spin_unlock_bh(&bdi->wb_lock);
572
573	if (task)
574	kthread_stop(task);
575	}	377	}
576		378
577	/*	379	/*
@@ -597,10 +399,8 @@ void bdi_unregister(struct backing_dev_info *bdi)
597	bdi_set_min_ratio(bdi, 0);	399	bdi_set_min_ratio(bdi, 0);
598	trace_writeback_bdi_unregister(bdi);	400	trace_writeback_bdi_unregister(bdi);
599	bdi_prune_sb(bdi);	401	bdi_prune_sb(bdi);
600	del_timer_sync(&bdi->wb.wakeup_timer);
601		402
602	if (!bdi_cap_flush_forker(bdi))	403	bdi_wb_shutdown(bdi);
603	bdi_wb_shutdown(bdi);
604	bdi_debug_unregister(bdi);	404	bdi_debug_unregister(bdi);
605		405
606	spin_lock_bh(&bdi->wb_lock);	406	spin_lock_bh(&bdi->wb_lock);
@@ -622,7 +422,7 @@ static void bdi_wb_init(struct bdi_writeback wb, struct backing_dev_info bdi)
622	INIT_LIST_HEAD(&wb->b_io);	422	INIT_LIST_HEAD(&wb->b_io);
623	INIT_LIST_HEAD(&wb->b_more_io);	423	INIT_LIST_HEAD(&wb->b_more_io);
624	spin_lock_init(&wb->list_lock);	424	spin_lock_init(&wb->list_lock);
625	setup_timer(&wb->wakeup_timer, wakeup_timer_fn, (unsigned long)bdi);	425	INIT_DELAYED_WORK(&wb->dwork, bdi_writeback_workfn);
626	}	426	}
627		427
628	/*	428	/*
@@ -695,12 +495,11 @@ void bdi_destroy(struct backing_dev_info *bdi)
695	bdi_unregister(bdi);	495	bdi_unregister(bdi);
696		496
697	/*	497	/*
698	* If bdi_unregister() had already been called earlier, the	498	* If bdi_unregister() had already been called earlier, the dwork
699	* wakeup_timer could still be armed because bdi_prune_sb()	499	* could still be pending because bdi_prune_sb() can race with the
700	* can race with the bdi_wakeup_thread_delayed() calls from	500	* bdi_wakeup_thread_delayed() calls from __mark_inode_dirty().
701	* __mark_inode_dirty().
702	*/	501	*/
703	del_timer_sync(&bdi->wb.wakeup_timer);	502	cancel_delayed_work_sync(&bdi->wb.dwork);
704		503
705	for (i = 0; i < NR_BDI_STAT_ITEMS; i++)	504	for (i = 0; i < NR_BDI_STAT_ITEMS; i++)
706	percpu_counter_destroy(&bdi->bdi_stat[i]);	505	percpu_counter_destroy(&bdi->bdi_stat[i]);


diff --git a/mm/bounce.c b/mm/bounce.c index a5c2ec3589cb..c9f0a4339a7d 100644 --- a/mm/bounce.c +++ b/mm/bounce.c
@@ -101,7 +101,7 @@ static void copy_to_high_bio_irq(struct bio to, struct bio from)
101	struct bio_vec tovec, fromvec;	101	struct bio_vec tovec, fromvec;
102	int i;	102	int i;
103		103
104	__bio_for_each_segment(tovec, to, i, 0) {	104	bio_for_each_segment(tovec, to, i) {
105	fromvec = from->bi_io_vec + i;	105	fromvec = from->bi_io_vec + i;
106		106
107	/*	107	/*
@@ -134,7 +134,7 @@ static void bounce_end_io(struct bio bio, mempool_t pool, int err)
134	/*	134	/*
135	* free up bounce indirect pages used	135	* free up bounce indirect pages used
136	*/	136	*/
137	__bio_for_each_segment(bvec, bio, i, 0) {	137	bio_for_each_segment_all(bvec, bio, i) {
138	org_vec = bio_orig->bi_io_vec + i;	138	org_vec = bio_orig->bi_io_vec + i;
139	if (bvec->bv_page == org_vec->bv_page)	139	if (bvec->bv_page == org_vec->bv_page)
140	continue;	140	continue;
@@ -199,78 +199,43 @@ static int must_snapshot_stable_pages(struct request_queue q, struct bio bio)
199	static void __blk_queue_bounce(struct request_queue q, struct bio *bio_orig,	199	static void __blk_queue_bounce(struct request_queue q, struct bio *bio_orig,
200	mempool_t *pool, int force)	200	mempool_t *pool, int force)
201	{	201	{
202	struct page *page;	202	struct bio *bio;
203	struct bio *bio = NULL;	203	int rw = bio_data_dir(*bio_orig);
204	int i, rw = bio_data_dir(*bio_orig);
205	struct bio_vec to, from;	204	struct bio_vec to, from;
		205	unsigned i;
206		206
207	bio_for_each_segment(from, *bio_orig, i) {	207	bio_for_each_segment(from, *bio_orig, i)
208	page = from->bv_page;	208	if (page_to_pfn(from->bv_page) > queue_bounce_pfn(q))
		209	goto bounce;
209		210
210	/*	211	return;
211	* is destination page below bounce pfn?	212	bounce:
212	*/	213	bio = bio_clone_bioset(*bio_orig, GFP_NOIO, fs_bio_set);
213	if (page_to_pfn(page) <= queue_bounce_pfn(q) && !force)
214	continue;
215
216	/*
217	* irk, bounce it
218	*/
219	if (!bio) {
220	unsigned int cnt = (*bio_orig)->bi_vcnt;
221		214
222	bio = bio_alloc(GFP_NOIO, cnt);	215	bio_for_each_segment_all(to, bio, i) {
223	memset(bio->bi_io_vec, 0, cnt * sizeof(struct bio_vec));	216	struct page *page = to->bv_page;
224	}
225
226		217
227	to = bio->bi_io_vec + i;	218	if (page_to_pfn(page) <= queue_bounce_pfn(q) && !force)
		219	continue;
228		220
229	to->bv_page = mempool_alloc(pool, q->bounce_gfp);
230	to->bv_len = from->bv_len;
231	to->bv_offset = from->bv_offset;
232	inc_zone_page_state(to->bv_page, NR_BOUNCE);	221	inc_zone_page_state(to->bv_page, NR_BOUNCE);
		222	to->bv_page = mempool_alloc(pool, q->bounce_gfp);
233		223
234	if (rw == WRITE) {	224	if (rw == WRITE) {
235	char vto, vfrom;	225	char vto, vfrom;
236		226
237	flush_dcache_page(from->bv_page);	227	flush_dcache_page(page);
		228
238	vto = page_address(to->bv_page) + to->bv_offset;	229	vto = page_address(to->bv_page) + to->bv_offset;
239	vfrom = kmap(from->bv_page) + from->bv_offset;	230	vfrom = kmap_atomic(page) + to->bv_offset;
240	memcpy(vto, vfrom, to->bv_len);	231	memcpy(vto, vfrom, to->bv_len);
241	kunmap(from->bv_page);	232	kunmap_atomic(vfrom);
242	}	233	}
243	}	234	}
244		235
245	/*
246	* no pages bounced
247	*/
248	if (!bio)
249	return;
250
251	trace_block_bio_bounce(q, *bio_orig);	236	trace_block_bio_bounce(q, *bio_orig);
252		237
253	/*
254	* at least one page was bounced, fill in possible non-highmem
255	* pages
256	*/
257	__bio_for_each_segment(from, *bio_orig, i, 0) {
258	to = bio_iovec_idx(bio, i);
259	if (!to->bv_page) {
260	to->bv_page = from->bv_page;
261	to->bv_len = from->bv_len;
262	to->bv_offset = from->bv_offset;
263	}
264	}
265
266	bio->bi_bdev = (*bio_orig)->bi_bdev;
267	bio->bi_flags \|= (1 << BIO_BOUNCED);	238	bio->bi_flags \|= (1 << BIO_BOUNCED);
268	bio->bi_sector = (*bio_orig)->bi_sector;
269	bio->bi_rw = (*bio_orig)->bi_rw;
270
271	bio->bi_vcnt = (*bio_orig)->bi_vcnt;
272	bio->bi_idx = (*bio_orig)->bi_idx;
273	bio->bi_size = (*bio_orig)->bi_size;
274		239
275	if (pool == page_pool) {	240	if (pool == page_pool) {
276	bio->bi_end_io = bounce_end_io_write;	241	bio->bi_end_io = bounce_end_io_write;


diff --git a/mm/page_io.c b/mm/page_io.c index 06a8842a6ec6..a8a3ef45fed7 100644 --- a/mm/page_io.c +++ b/mm/page_io.c
@@ -36,7 +36,6 @@ static struct bio *get_swap_bio(gfp_t gfp_flags,
36	bio->bi_io_vec[0].bv_len = PAGE_SIZE;	36	bio->bi_io_vec[0].bv_len = PAGE_SIZE;
37	bio->bi_io_vec[0].bv_offset = 0;	37	bio->bi_io_vec[0].bv_offset = 0;
38	bio->bi_vcnt = 1;	38	bio->bi_vcnt = 1;
39	bio->bi_idx = 0;
40	bio->bi_size = PAGE_SIZE;	39	bio->bi_size = PAGE_SIZE;
41	bio->bi_end_io = end_io;	40	bio->bi_end_io = end_io;
42	}	41	}