writeback: replace custom worker pool implementation with unbound workqueue

Writeback implements its own worker pool - each bdi can be associated with a worker thread which is created and destroyed dynamically. The worker thread for the default bdi is always present and serves as the "forker" thread which forks off worker threads for other bdis. there's no reason for writeback to implement its own worker pool when using unbound workqueue instead is much simpler and more efficient. This patch replaces custom worker pool implementation in writeback with an unbound workqueue. The conversion isn't too complicated but the followings are worth mentioning. * bdi_writeback->last_active, task and wakeup_timer are removed. delayed_work ->dwork is added instead. Explicit timer handling is no longer necessary. Everything works by either queueing / modding / flushing / canceling the delayed_work item. * bdi_writeback_thread() becomes bdi_writeback_workfn() which runs off bdi_writeback->dwork. On each execution, it processes bdi->work_list and reschedules itself if there are more things to do. The function also handles low-mem condition, which used to be handled by the forker thread. If the function is running off a rescuer thread, it only writes out limited number of pages so that the rescuer can serve other bdis too. This preserves the flusher creation failure behavior of the forker thread. * INIT_LIST_HEAD(&bdi->bdi_list) is used to tell bdi_writeback_workfn() about on-going bdi unregistration so that it always drains work_list even if it's running off the rescuer. Note that the original code was broken in this regard. Under memory pressure, a bdi could finish unregistration with non-empty work_list. * The default bdi is no longer special. It now is treated the same as any other bdi and bdi_cap_flush_forker() is removed. * BDI_pending is no longer used. Removed. * Some tracepoints become non-applicable. The following TPs are removed - writeback_nothread, writeback_wake_thread, writeback_wake_forker_thread, writeback_thread_start, writeback_thread_stop. Everything, including devices coming and going away and rescuer operation under simulated memory pressure, seems to work fine in my test setup. Signed-off-by: Tejun Heo <tj@kernel.org> Reviewed-by: Jan Kara <jack@suse.cz> Cc: Jens Axboe <axboe@kernel.dk> Cc: Fengguang Wu <fengguang.wu@intel.com> Cc: Jeff Moyer <jmoyer@redhat.com>
author: Tejun Heo <tj@kernel.org> 2013-04-01 22:08:06 -0400
committer: Tejun Heo <tj@kernel.org> 2013-04-01 22:08:06 -0400
commit: 839a8e8660b6777e7fe4e80af1a048aebe2b5977 (patch)
tree: 80398cd4dd8ebc4c51be20725c0cc427bfe321b3 /mm
parent: 181387da2d64c3129e5b5186c4dd388bc5041d53 (diff)
1 files changed, 28 insertions, 227 deletions
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 657569b3fcf6..2857d4f6bca4 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -37,6 +37,9 @@ static struct class *bdi_class;
 DEFINE_SPINLOCK(bdi_lock);
 LIST_HEAD(bdi_list);
+/* bdi_wq serves all asynchronous writeback tasks */
+struct workqueue_struct *bdi_wq;
 void bdi_lock_two(struct bdi_writeback *wb1, struct bdi_writeback *wb2)
 {
        if (wb1 < wb2) {
@@ -255,6 +258,11 @@ static int __init default_bdi_init(void)
 {
        int err;
+        bdi_wq = alloc_workqueue("writeback", WQ_MEM_RECLAIM | WQ_FREEZABLE |
+                                              WQ_UNBOUND, 0);
+        if (!bdi_wq)
+                return -ENOMEM;
        err = bdi_init(&default_backing_dev_info);
        if (!err)
                bdi_register(&default_backing_dev_info, NULL, "default");
@@ -269,26 +277,6 @@ int bdi_has_dirty_io(struct backing_dev_info *bdi)
        return wb_has_dirty_io(&bdi->wb);
 }
-static void wakeup_timer_fn(unsigned long data)
-{
-        struct backing_dev_info *bdi = (struct backing_dev_info *)data;
-        spin_lock_bh(&bdi->wb_lock);
-        if (bdi->wb.task) {
-                trace_writeback_wake_thread(bdi);
-                wake_up_process(bdi->wb.task);
-        } else if (bdi->dev) {
-                /*
-                 * When bdi tasks are inactive for long time, they are killed.
-                 * In this case we have to wake-up the forker thread which
-                 * should create and run the bdi thread.
-                 */
-                trace_writeback_wake_forker_thread(bdi);
-                wake_up_process(default_backing_dev_info.wb.task);
-        }
-        spin_unlock_bh(&bdi->wb_lock);
-}
 /*
 * This function is used when the first inode for this bdi is marked dirty. It
 * wakes-up the corresponding bdi thread which should then take care of the
@@ -305,176 +293,7 @@ void bdi_wakeup_thread_delayed(struct backing_dev_info *bdi)
        unsigned long timeout;
        timeout = msecs_to_jiffies(dirty_writeback_interval * 10);
-        mod_timer(&bdi->wb.wakeup_timer, jiffies + timeout);
+        mod_delayed_work(bdi_wq, &bdi->wb.dwork, timeout);
-}
-/*
- * Calculate the longest interval (jiffies) bdi threads are allowed to be
- * inactive.
- */
-static unsigned long bdi_longest_inactive(void)
-{
-        unsigned long interval;
-        interval = msecs_to_jiffies(dirty_writeback_interval * 10);
-        return max(5UL * 60 * HZ, interval);
-}
-/*
- * Clear pending bit and wakeup anybody waiting for flusher thread creation or
- * shutdown
- */
-static void bdi_clear_pending(struct backing_dev_info *bdi)
-{
-        clear_bit(BDI_pending, &bdi->state);
-        smp_mb__after_clear_bit();
-        wake_up_bit(&bdi->state, BDI_pending);
-}
-static int bdi_forker_thread(void *ptr)
-{
-        struct bdi_writeback *me = ptr;
-        current->flags |= PF_SWAPWRITE;
-        set_freezable();
-        /*
-         * Our parent may run at a different priority, just set us to normal
-         */
-        set_user_nice(current, 0);
-        for (;;) {
-                struct task_struct *task = NULL;
-                struct backing_dev_info *bdi;
-                enum {
-                        NO_ACTION,   /* Nothing to do */
-                        FORK_THREAD, /* Fork bdi thread */
-                        KILL_THREAD, /* Kill inactive bdi thread */
-                } action = NO_ACTION;
-                /*
-                 * Temporary measure, we want to make sure we don't see
-                 * dirty data on the default backing_dev_info
-                 */
-                if (wb_has_dirty_io(me) || !list_empty(&me->bdi->work_list)) {
-                        del_timer(&me->wakeup_timer);
-                        wb_do_writeback(me, 0);
-                }
-                spin_lock_bh(&bdi_lock);
-                /*
-                 * In the following loop we are going to check whether we have
-                 * some work to do without any synchronization with tasks
-                 * waking us up to do work for them. Set the task state here
-                 * so that we don't miss wakeups after verifying conditions.
-                 */
-                set_current_state(TASK_INTERRUPTIBLE);
-                list_for_each_entry(bdi, &bdi_list, bdi_list) {
-                        bool have_dirty_io;
-                        if (!bdi_cap_writeback_dirty(bdi) ||
-                             bdi_cap_flush_forker(bdi))
-                                continue;
-                        WARN(!test_bit(BDI_registered, &bdi->state),
-                             "bdi %p/%s is not registered!\n", bdi, bdi->name);
-                        have_dirty_io = !list_empty(&bdi->work_list) ||
-                                        wb_has_dirty_io(&bdi->wb);
-                        /*
-                         * If the bdi has work to do, but the thread does not
-                         * exist - create it.
-                         */
-                        if (!bdi->wb.task && have_dirty_io) {
-                                /*
-                                 * Set the pending bit - if someone will try to
-                                 * unregister this bdi - it'll wait on this bit.
-                                 */
-                                set_bit(BDI_pending, &bdi->state);
-                                action = FORK_THREAD;
-                                break;
-                        }
-                        spin_lock(&bdi->wb_lock);
-                        /*
-                         * If there is no work to do and the bdi thread was
-                         * inactive long enough - kill it. The wb_lock is taken
-                         * to make sure no-one adds more work to this bdi and
-                         * wakes the bdi thread up.
-                         */
-                        if (bdi->wb.task && !have_dirty_io &&
-                            time_after(jiffies, bdi->wb.last_active +
-                                                bdi_longest_inactive())) {
-                                task = bdi->wb.task;
-                                bdi->wb.task = NULL;
-                                spin_unlock(&bdi->wb_lock);
-                                set_bit(BDI_pending, &bdi->state);
-                                action = KILL_THREAD;
-                                break;
-                        }
-                        spin_unlock(&bdi->wb_lock);
-                }
-                spin_unlock_bh(&bdi_lock);
-                /* Keep working if default bdi still has things to do */
-                if (!list_empty(&me->bdi->work_list))
-                        __set_current_state(TASK_RUNNING);
-                switch (action) {
-                case FORK_THREAD:
-                        __set_current_state(TASK_RUNNING);
-                        task = kthread_create(bdi_writeback_thread, &bdi->wb,
-                                              "flush-%s", dev_name(bdi->dev));
-                        if (IS_ERR(task)) {
-                                /*
-                                 * If thread creation fails, force writeout of
-                                 * the bdi from the thread. Hopefully 1024 is
-                                 * large enough for efficient IO.
-                                 */
-                                writeback_inodes_wb(&bdi->wb, 1024,
-                                                    WB_REASON_FORKER_THREAD);
-                        } else {
-                                /*
-                                 * The spinlock makes sure we do not lose
-                                 * wake-ups when racing with 'bdi_queue_work()'.
-                                 * And as soon as the bdi thread is visible, we
-                                 * can start it.
-                                 */
-                                spin_lock_bh(&bdi->wb_lock);
-                                bdi->wb.task = task;
-                                spin_unlock_bh(&bdi->wb_lock);
-                                wake_up_process(task);
-                        }
-                        bdi_clear_pending(bdi);
-                        break;
-                case KILL_THREAD:
-                        __set_current_state(TASK_RUNNING);
-                        kthread_stop(task);
-                        bdi_clear_pending(bdi);
-                        break;
-                case NO_ACTION:
-                        if (!wb_has_dirty_io(me) || !dirty_writeback_interval)
-                                /*
-                                 * There are no dirty data. The only thing we
-                                 * should now care about is checking for
-                                 * inactive bdi threads and killing them. Thus,
-                                 * let's sleep for longer time, save energy and
-                                 * be friendly for battery-driven devices.
-                                 */
-                                schedule_timeout(bdi_longest_inactive());
-                        else
-                                schedule_timeout(msecs_to_jiffies(dirty_writeback_interval * 10));
-                        try_to_freeze();
-                        break;
-                }
-        }
-        return 0;
 }
 /*
@@ -487,6 +306,9 @@ static void bdi_remove_from_list(struct backing_dev_info *bdi)
        spin_unlock_bh(&bdi_lock);
        synchronize_rcu_expedited();
+        /* bdi_list is now unused, clear it to mark @bdi dying */
+        INIT_LIST_HEAD(&bdi->bdi_list);
 }
 int bdi_register(struct backing_dev_info *bdi, struct device *parent,
@@ -506,20 +328,6 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent,
        bdi->dev = dev;
-        /*
-         * Just start the forker thread for our default backing_dev_info,
-         * and add other bdi's to the list. They will get a thread created
-         * on-demand when they need it.
-         */
-        if (bdi_cap_flush_forker(bdi)) {
-                struct bdi_writeback *wb = &bdi->wb;
-                wb->task = kthread_run(bdi_forker_thread, wb, "bdi-%s",
-                                                dev_name(dev));
-                if (IS_ERR(wb->task))
-                        return PTR_ERR(wb->task);
-        }
        bdi_debug_register(bdi, dev_name(dev));
        set_bit(BDI_registered, &bdi->state);
@@ -543,8 +351,6 @@ EXPORT_SYMBOL(bdi_register_dev);
 */
 static void bdi_wb_shutdown(struct backing_dev_info *bdi)
 {
-        struct task_struct *task;
        if (!bdi_cap_writeback_dirty(bdi))
                return;
@@ -554,22 +360,20 @@ static void bdi_wb_shutdown(struct backing_dev_info *bdi)
        bdi_remove_from_list(bdi);
        /*
-         * If setup is pending, wait for that to complete first
+         * Drain work list and shutdown the delayed_work.  At this point,
+         * @bdi->bdi_list is empty telling bdi_Writeback_workfn() that @bdi
+         * is dying and its work_list needs to be drained no matter what.
         */
-        wait_on_bit(&bdi->state, BDI_pending, bdi_sched_wait,
+        mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0);
-                        TASK_UNINTERRUPTIBLE);
+        flush_delayed_work(&bdi->wb.dwork);
+        WARN_ON(!list_empty(&bdi->work_list));
        /*
-         * Finally, kill the kernel thread. We don't need to be RCU
+         * This shouldn't be necessary unless @bdi for some reason has
-         * safe anymore, since the bdi is gone from visibility.
+         * unflushed dirty IO after work_list is drained.  Do it anyway
+         * just in case.
         */
-        spin_lock_bh(&bdi->wb_lock);
+        cancel_delayed_work_sync(&bdi->wb.dwork);
-        task = bdi->wb.task;
-        bdi->wb.task = NULL;
-        spin_unlock_bh(&bdi->wb_lock);
-        if (task)
-                kthread_stop(task);
 }
 /*
@@ -595,10 +399,8 @@ void bdi_unregister(struct backing_dev_info *bdi)
                bdi_set_min_ratio(bdi, 0);
                trace_writeback_bdi_unregister(bdi);
                bdi_prune_sb(bdi);
-                del_timer_sync(&bdi->wb.wakeup_timer);
-                if (!bdi_cap_flush_forker(bdi))
+                bdi_wb_shutdown(bdi);
-                        bdi_wb_shutdown(bdi);
                bdi_debug_unregister(bdi);
                spin_lock_bh(&bdi->wb_lock);
@@ -620,7 +422,7 @@ static void bdi_wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi)
        INIT_LIST_HEAD(&wb->b_io);
        INIT_LIST_HEAD(&wb->b_more_io);
        spin_lock_init(&wb->list_lock);
-        setup_timer(&wb->wakeup_timer, wakeup_timer_fn, (unsigned long)bdi);
+        INIT_DELAYED_WORK(&wb->dwork, bdi_writeback_workfn);
 }
 /*
@@ -693,12 +495,11 @@ void bdi_destroy(struct backing_dev_info *bdi)
        bdi_unregister(bdi);
        /*
-         * If bdi_unregister() had already been called earlier, the
+         * If bdi_unregister() had already been called earlier, the dwork
-         * wakeup_timer could still be armed because bdi_prune_sb()
+         * could still be pending because bdi_prune_sb() can race with the
-         * can race with the bdi_wakeup_thread_delayed() calls from
+         * bdi_wakeup_thread_delayed() calls from __mark_inode_dirty().
-         * __mark_inode_dirty().
         */
-        del_timer_sync(&bdi->wb.wakeup_timer);
+        cancel_delayed_work_sync(&bdi->wb.dwork);
        for (i = 0; i < NR_BDI_STAT_ITEMS; i++)
                percpu_counter_destroy(&bdi->bdi_stat[i]);
author	Tejun Heo <tj@kernel.org>	2013-04-01 22:08:06 -0400
committer	Tejun Heo <tj@kernel.org>	2013-04-01 22:08:06 -0400
commit	839a8e8660b6777e7fe4e80af1a048aebe2b5977 (patch)
tree	80398cd4dd8ebc4c51be20725c0cc427bfe321b3 /mm
parent	181387da2d64c3129e5b5186c4dd388bc5041d53 (diff)

diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 657569b3fcf6..2857d4f6bca4 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c
@@ -37,6 +37,9 @@ static struct class *bdi_class;
37	DEFINE_SPINLOCK(bdi_lock);	37	DEFINE_SPINLOCK(bdi_lock);
38	LIST_HEAD(bdi_list);	38	LIST_HEAD(bdi_list);
39		39
		40	/* bdi_wq serves all asynchronous writeback tasks */
		41	struct workqueue_struct *bdi_wq;
		42
40	void bdi_lock_two(struct bdi_writeback wb1, struct bdi_writeback wb2)	43	void bdi_lock_two(struct bdi_writeback wb1, struct bdi_writeback wb2)
41	{	44	{
42	if (wb1 < wb2) {	45	if (wb1 < wb2) {
@@ -255,6 +258,11 @@ static int __init default_bdi_init(void)
255	{	258	{
256	int err;	259	int err;
257		260
		261	bdi_wq = alloc_workqueue("writeback", WQ_MEM_RECLAIM \| WQ_FREEZABLE \|
		262	WQ_UNBOUND, 0);
		263	if (!bdi_wq)
		264	return -ENOMEM;
		265
258	err = bdi_init(&default_backing_dev_info);	266	err = bdi_init(&default_backing_dev_info);
259	if (!err)	267	if (!err)
260	bdi_register(&default_backing_dev_info, NULL, "default");	268	bdi_register(&default_backing_dev_info, NULL, "default");
@@ -269,26 +277,6 @@ int bdi_has_dirty_io(struct backing_dev_info *bdi)
269	return wb_has_dirty_io(&bdi->wb);	277	return wb_has_dirty_io(&bdi->wb);
270	}	278	}
271		279
272	static void wakeup_timer_fn(unsigned long data)
273	{
274	struct backing_dev_info bdi = (struct backing_dev_info )data;
275
276	spin_lock_bh(&bdi->wb_lock);
277	if (bdi->wb.task) {
278	trace_writeback_wake_thread(bdi);
279	wake_up_process(bdi->wb.task);
280	} else if (bdi->dev) {
281	/*
282	* When bdi tasks are inactive for long time, they are killed.
283	* In this case we have to wake-up the forker thread which
284	* should create and run the bdi thread.
285	*/
286	trace_writeback_wake_forker_thread(bdi);
287	wake_up_process(default_backing_dev_info.wb.task);
288	}
289	spin_unlock_bh(&bdi->wb_lock);
290	}
291
292	/*	280	/*
293	* This function is used when the first inode for this bdi is marked dirty. It	281	* This function is used when the first inode for this bdi is marked dirty. It
294	* wakes-up the corresponding bdi thread which should then take care of the	282	* wakes-up the corresponding bdi thread which should then take care of the
@@ -305,176 +293,7 @@ void bdi_wakeup_thread_delayed(struct backing_dev_info *bdi)
305	unsigned long timeout;	293	unsigned long timeout;
306		294
307	timeout = msecs_to_jiffies(dirty_writeback_interval * 10);	295	timeout = msecs_to_jiffies(dirty_writeback_interval * 10);
308	mod_timer(&bdi->wb.wakeup_timer, jiffies + timeout);	296	mod_delayed_work(bdi_wq, &bdi->wb.dwork, timeout);
309	}
310
311	/*
312	* Calculate the longest interval (jiffies) bdi threads are allowed to be
313	* inactive.
314	*/
315	static unsigned long bdi_longest_inactive(void)
316	{
317	unsigned long interval;
318
319	interval = msecs_to_jiffies(dirty_writeback_interval * 10);
320	return max(5UL * 60 * HZ, interval);
321	}
322
323	/*
324	* Clear pending bit and wakeup anybody waiting for flusher thread creation or
325	* shutdown
326	*/
327	static void bdi_clear_pending(struct backing_dev_info *bdi)
328	{
329	clear_bit(BDI_pending, &bdi->state);
330	smp_mb__after_clear_bit();
331	wake_up_bit(&bdi->state, BDI_pending);
332	}
333
334	static int bdi_forker_thread(void *ptr)
335	{
336	struct bdi_writeback *me = ptr;
337
338	current->flags \|= PF_SWAPWRITE;
339	set_freezable();
340
341	/*
342	* Our parent may run at a different priority, just set us to normal
343	*/
344	set_user_nice(current, 0);
345
346	for (;;) {
347	struct task_struct *task = NULL;
348	struct backing_dev_info *bdi;
349	enum {
350	NO_ACTION, /* Nothing to do */
351	FORK_THREAD, /* Fork bdi thread */
352	KILL_THREAD, /* Kill inactive bdi thread */
353	} action = NO_ACTION;
354
355	/*
356	* Temporary measure, we want to make sure we don't see
357	* dirty data on the default backing_dev_info
358	*/
359	if (wb_has_dirty_io(me) \|\| !list_empty(&me->bdi->work_list)) {
360	del_timer(&me->wakeup_timer);
361	wb_do_writeback(me, 0);
362	}
363
364	spin_lock_bh(&bdi_lock);
365	/*
366	* In the following loop we are going to check whether we have
367	* some work to do without any synchronization with tasks
368	* waking us up to do work for them. Set the task state here
369	* so that we don't miss wakeups after verifying conditions.
370	*/
371	set_current_state(TASK_INTERRUPTIBLE);
372
373	list_for_each_entry(bdi, &bdi_list, bdi_list) {
374	bool have_dirty_io;
375
376	if (!bdi_cap_writeback_dirty(bdi) \|\|
377	bdi_cap_flush_forker(bdi))
378	continue;
379
380	WARN(!test_bit(BDI_registered, &bdi->state),
381	"bdi %p/%s is not registered!\n", bdi, bdi->name);
382
383	have_dirty_io = !list_empty(&bdi->work_list) \|\|
384	wb_has_dirty_io(&bdi->wb);
385
386	/*
387	* If the bdi has work to do, but the thread does not
388	* exist - create it.
389	*/
390	if (!bdi->wb.task && have_dirty_io) {
391	/*
392	* Set the pending bit - if someone will try to
393	* unregister this bdi - it'll wait on this bit.
394	*/
395	set_bit(BDI_pending, &bdi->state);
396	action = FORK_THREAD;
397	break;
398	}
399
400	spin_lock(&bdi->wb_lock);
401
402	/*
403	* If there is no work to do and the bdi thread was
404	* inactive long enough - kill it. The wb_lock is taken
405	* to make sure no-one adds more work to this bdi and
406	* wakes the bdi thread up.
407	*/
408	if (bdi->wb.task && !have_dirty_io &&
409	time_after(jiffies, bdi->wb.last_active +
410	bdi_longest_inactive())) {
411	task = bdi->wb.task;
412	bdi->wb.task = NULL;
413	spin_unlock(&bdi->wb_lock);
414	set_bit(BDI_pending, &bdi->state);
415	action = KILL_THREAD;
416	break;
417	}
418	spin_unlock(&bdi->wb_lock);
419	}
420	spin_unlock_bh(&bdi_lock);
421
422	/* Keep working if default bdi still has things to do */
423	if (!list_empty(&me->bdi->work_list))
424	__set_current_state(TASK_RUNNING);
425
426	switch (action) {
427	case FORK_THREAD:
428	__set_current_state(TASK_RUNNING);
429	task = kthread_create(bdi_writeback_thread, &bdi->wb,
430	"flush-%s", dev_name(bdi->dev));
431	if (IS_ERR(task)) {
432	/*
433	* If thread creation fails, force writeout of
434	* the bdi from the thread. Hopefully 1024 is
435	* large enough for efficient IO.
436	*/
437	writeback_inodes_wb(&bdi->wb, 1024,
438	WB_REASON_FORKER_THREAD);
439	} else {
440	/*
441	* The spinlock makes sure we do not lose
442	* wake-ups when racing with 'bdi_queue_work()'.
443	* And as soon as the bdi thread is visible, we
444	* can start it.
445	*/
446	spin_lock_bh(&bdi->wb_lock);
447	bdi->wb.task = task;
448	spin_unlock_bh(&bdi->wb_lock);
449	wake_up_process(task);
450	}
451	bdi_clear_pending(bdi);
452	break;
453
454	case KILL_THREAD:
455	__set_current_state(TASK_RUNNING);
456	kthread_stop(task);
457	bdi_clear_pending(bdi);
458	break;
459
460	case NO_ACTION:
461	if (!wb_has_dirty_io(me) \|\| !dirty_writeback_interval)
462	/*
463	* There are no dirty data. The only thing we
464	* should now care about is checking for
465	* inactive bdi threads and killing them. Thus,
466	* let's sleep for longer time, save energy and
467	* be friendly for battery-driven devices.
468	*/
469	schedule_timeout(bdi_longest_inactive());
470	else
471	schedule_timeout(msecs_to_jiffies(dirty_writeback_interval * 10));
472	try_to_freeze();
473	break;
474	}
475	}
476
477	return 0;
478	}	297	}
479		298
480	/*	299	/*
@@ -487,6 +306,9 @@ static void bdi_remove_from_list(struct backing_dev_info *bdi)
487	spin_unlock_bh(&bdi_lock);	306	spin_unlock_bh(&bdi_lock);
488		307
489	synchronize_rcu_expedited();	308	synchronize_rcu_expedited();
		309
		310	/* bdi_list is now unused, clear it to mark @bdi dying */
		311	INIT_LIST_HEAD(&bdi->bdi_list);
490	}	312	}
491		313
492	int bdi_register(struct backing_dev_info bdi, struct device parent,	314	int bdi_register(struct backing_dev_info bdi, struct device parent,
@@ -506,20 +328,6 @@ int bdi_register(struct backing_dev_info bdi, struct device parent,
506		328
507	bdi->dev = dev;	329	bdi->dev = dev;
508		330
509	/*
510	* Just start the forker thread for our default backing_dev_info,
511	* and add other bdi's to the list. They will get a thread created
512	* on-demand when they need it.
513	*/
514	if (bdi_cap_flush_forker(bdi)) {
515	struct bdi_writeback *wb = &bdi->wb;
516
517	wb->task = kthread_run(bdi_forker_thread, wb, "bdi-%s",
518	dev_name(dev));
519	if (IS_ERR(wb->task))
520	return PTR_ERR(wb->task);
521	}
522
523	bdi_debug_register(bdi, dev_name(dev));	331	bdi_debug_register(bdi, dev_name(dev));
524	set_bit(BDI_registered, &bdi->state);	332	set_bit(BDI_registered, &bdi->state);
525		333
@@ -543,8 +351,6 @@ EXPORT_SYMBOL(bdi_register_dev);
543	*/	351	*/
544	static void bdi_wb_shutdown(struct backing_dev_info *bdi)	352	static void bdi_wb_shutdown(struct backing_dev_info *bdi)
545	{	353	{
546	struct task_struct *task;
547
548	if (!bdi_cap_writeback_dirty(bdi))	354	if (!bdi_cap_writeback_dirty(bdi))
549	return;	355	return;
550		356
@@ -554,22 +360,20 @@ static void bdi_wb_shutdown(struct backing_dev_info *bdi)
554	bdi_remove_from_list(bdi);	360	bdi_remove_from_list(bdi);
555		361
556	/*	362	/*
557	* If setup is pending, wait for that to complete first	363	* Drain work list and shutdown the delayed_work. At this point,
		364	* @bdi->bdi_list is empty telling bdi_Writeback_workfn() that @bdi
		365	* is dying and its work_list needs to be drained no matter what.
558	*/	366	*/
559	wait_on_bit(&bdi->state, BDI_pending, bdi_sched_wait,	367	mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0);
560	TASK_UNINTERRUPTIBLE);	368	flush_delayed_work(&bdi->wb.dwork);
		369	WARN_ON(!list_empty(&bdi->work_list));
561		370
562	/*	371	/*
563	* Finally, kill the kernel thread. We don't need to be RCU	372	* This shouldn't be necessary unless @bdi for some reason has
564	* safe anymore, since the bdi is gone from visibility.	373	* unflushed dirty IO after work_list is drained. Do it anyway
		374	* just in case.
565	*/	375	*/
566	spin_lock_bh(&bdi->wb_lock);	376	cancel_delayed_work_sync(&bdi->wb.dwork);
567	task = bdi->wb.task;
568	bdi->wb.task = NULL;
569	spin_unlock_bh(&bdi->wb_lock);
570
571	if (task)
572	kthread_stop(task);
573	}	377	}
574		378
575	/*	379	/*
@@ -595,10 +399,8 @@ void bdi_unregister(struct backing_dev_info *bdi)
595	bdi_set_min_ratio(bdi, 0);	399	bdi_set_min_ratio(bdi, 0);
596	trace_writeback_bdi_unregister(bdi);	400	trace_writeback_bdi_unregister(bdi);
597	bdi_prune_sb(bdi);	401	bdi_prune_sb(bdi);
598	del_timer_sync(&bdi->wb.wakeup_timer);
599		402
600	if (!bdi_cap_flush_forker(bdi))	403	bdi_wb_shutdown(bdi);
601	bdi_wb_shutdown(bdi);
602	bdi_debug_unregister(bdi);	404	bdi_debug_unregister(bdi);
603		405
604	spin_lock_bh(&bdi->wb_lock);	406	spin_lock_bh(&bdi->wb_lock);
@@ -620,7 +422,7 @@ static void bdi_wb_init(struct bdi_writeback wb, struct backing_dev_info bdi)
620	INIT_LIST_HEAD(&wb->b_io);	422	INIT_LIST_HEAD(&wb->b_io);
621	INIT_LIST_HEAD(&wb->b_more_io);	423	INIT_LIST_HEAD(&wb->b_more_io);
622	spin_lock_init(&wb->list_lock);	424	spin_lock_init(&wb->list_lock);
623	setup_timer(&wb->wakeup_timer, wakeup_timer_fn, (unsigned long)bdi);	425	INIT_DELAYED_WORK(&wb->dwork, bdi_writeback_workfn);
624	}	426	}
625		427
626	/*	428	/*
@@ -693,12 +495,11 @@ void bdi_destroy(struct backing_dev_info *bdi)
693	bdi_unregister(bdi);	495	bdi_unregister(bdi);
694		496
695	/*	497	/*
696	* If bdi_unregister() had already been called earlier, the	498	* If bdi_unregister() had already been called earlier, the dwork
697	* wakeup_timer could still be armed because bdi_prune_sb()	499	* could still be pending because bdi_prune_sb() can race with the
698	* can race with the bdi_wakeup_thread_delayed() calls from	500	* bdi_wakeup_thread_delayed() calls from __mark_inode_dirty().
699	* __mark_inode_dirty().
700	*/	501	*/
701	del_timer_sync(&bdi->wb.wakeup_timer);	502	cancel_delayed_work_sync(&bdi->wb.dwork);
702		503
703	for (i = 0; i < NR_BDI_STAT_ITEMS; i++)	504	for (i = 0; i < NR_BDI_STAT_ITEMS; i++)
704	percpu_counter_destroy(&bdi->bdi_stat[i]);	505	percpu_counter_destroy(&bdi->bdi_stat[i]);