aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/fs-writeback.c102
-rw-r--r--include/linux/backing-dev.h15
-rw-r--r--include/trace/events/writeback.h5
-rw-r--r--mm/backing-dev.c255
4 files changed, 65 insertions, 312 deletions
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 21f46fb3a101..8067d3719e94 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -22,7 +22,6 @@
22#include <linux/mm.h> 22#include <linux/mm.h>
23#include <linux/pagemap.h> 23#include <linux/pagemap.h>
24#include <linux/kthread.h> 24#include <linux/kthread.h>
25#include <linux/freezer.h>
26#include <linux/writeback.h> 25#include <linux/writeback.h>
27#include <linux/blkdev.h> 26#include <linux/blkdev.h>
28#include <linux/backing-dev.h> 27#include <linux/backing-dev.h>
@@ -88,20 +87,6 @@ static inline struct inode *wb_inode(struct list_head *head)
88#define CREATE_TRACE_POINTS 87#define CREATE_TRACE_POINTS
89#include <trace/events/writeback.h> 88#include <trace/events/writeback.h>
90 89
91/* Wakeup flusher thread or forker thread to fork it. Requires bdi->wb_lock. */
92static void bdi_wakeup_flusher(struct backing_dev_info *bdi)
93{
94 if (bdi->wb.task) {
95 wake_up_process(bdi->wb.task);
96 } else {
97 /*
98 * The bdi thread isn't there, wake up the forker thread which
99 * will create and run it.
100 */
101 wake_up_process(default_backing_dev_info.wb.task);
102 }
103}
104
105static void bdi_queue_work(struct backing_dev_info *bdi, 90static void bdi_queue_work(struct backing_dev_info *bdi,
106 struct wb_writeback_work *work) 91 struct wb_writeback_work *work)
107{ 92{
@@ -109,10 +94,9 @@ static void bdi_queue_work(struct backing_dev_info *bdi,
109 94
110 spin_lock_bh(&bdi->wb_lock); 95 spin_lock_bh(&bdi->wb_lock);
111 list_add_tail(&work->list, &bdi->work_list); 96 list_add_tail(&work->list, &bdi->work_list);
112 if (!bdi->wb.task)
113 trace_writeback_nothread(bdi, work);
114 bdi_wakeup_flusher(bdi);
115 spin_unlock_bh(&bdi->wb_lock); 97 spin_unlock_bh(&bdi->wb_lock);
98
99 mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0);
116} 100}
117 101
118static void 102static void
@@ -127,10 +111,8 @@ __bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
127 */ 111 */
128 work = kzalloc(sizeof(*work), GFP_ATOMIC); 112 work = kzalloc(sizeof(*work), GFP_ATOMIC);
129 if (!work) { 113 if (!work) {
130 if (bdi->wb.task) { 114 trace_writeback_nowork(bdi);
131 trace_writeback_nowork(bdi); 115 mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0);
132 wake_up_process(bdi->wb.task);
133 }
134 return; 116 return;
135 } 117 }
136 118
@@ -177,9 +159,7 @@ void bdi_start_background_writeback(struct backing_dev_info *bdi)
177 * writeback as soon as there is no other work to do. 159 * writeback as soon as there is no other work to do.
178 */ 160 */
179 trace_writeback_wake_background(bdi); 161 trace_writeback_wake_background(bdi);
180 spin_lock_bh(&bdi->wb_lock); 162 mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0);
181 bdi_wakeup_flusher(bdi);
182 spin_unlock_bh(&bdi->wb_lock);
183} 163}
184 164
185/* 165/*
@@ -1020,66 +1000,48 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait)
1020 1000
1021/* 1001/*
1022 * Handle writeback of dirty data for the device backed by this bdi. Also 1002 * Handle writeback of dirty data for the device backed by this bdi. Also
1023 * wakes up periodically and does kupdated style flushing. 1003 * reschedules periodically and does kupdated style flushing.
1024 */ 1004 */
1025int bdi_writeback_thread(void *data) 1005void bdi_writeback_workfn(struct work_struct *work)
1026{ 1006{
1027 struct bdi_writeback *wb = data; 1007 struct bdi_writeback *wb = container_of(to_delayed_work(work),
1008 struct bdi_writeback, dwork);
1028 struct backing_dev_info *bdi = wb->bdi; 1009 struct backing_dev_info *bdi = wb->bdi;
1029 long pages_written; 1010 long pages_written;
1030 1011
1031 current->flags |= PF_SWAPWRITE; 1012 current->flags |= PF_SWAPWRITE;
1032 set_freezable();
1033 wb->last_active = jiffies;
1034
1035 /*
1036 * Our parent may run at a different priority, just set us to normal
1037 */
1038 set_user_nice(current, 0);
1039
1040 trace_writeback_thread_start(bdi);
1041 1013
1042 while (!kthread_freezable_should_stop(NULL)) { 1014 if (likely(!current_is_workqueue_rescuer() ||
1015 list_empty(&bdi->bdi_list))) {
1043 /* 1016 /*
1044 * Remove own delayed wake-up timer, since we are already awake 1017 * The normal path. Keep writing back @bdi until its
1045 * and we'll take care of the periodic write-back. 1018 * work_list is empty. Note that this path is also taken
1019 * if @bdi is shutting down even when we're running off the
1020 * rescuer as work_list needs to be drained.
1046 */ 1021 */
1047 del_timer(&wb->wakeup_timer); 1022 do {
1048 1023 pages_written = wb_do_writeback(wb, 0);
1049 pages_written = wb_do_writeback(wb, 0); 1024 trace_writeback_pages_written(pages_written);
1050 1025 } while (!list_empty(&bdi->work_list));
1026 } else {
1027 /*
1028 * bdi_wq can't get enough workers and we're running off
1029 * the emergency worker. Don't hog it. Hopefully, 1024 is
1030 * enough for efficient IO.
1031 */
1032 pages_written = writeback_inodes_wb(&bdi->wb, 1024,
1033 WB_REASON_FORKER_THREAD);
1051 trace_writeback_pages_written(pages_written); 1034 trace_writeback_pages_written(pages_written);
1052
1053 if (pages_written)
1054 wb->last_active = jiffies;
1055
1056 set_current_state(TASK_INTERRUPTIBLE);
1057 if (!list_empty(&bdi->work_list) || kthread_should_stop()) {
1058 __set_current_state(TASK_RUNNING);
1059 continue;
1060 }
1061
1062 if (wb_has_dirty_io(wb) && dirty_writeback_interval)
1063 schedule_timeout(msecs_to_jiffies(dirty_writeback_interval * 10));
1064 else {
1065 /*
1066 * We have nothing to do, so can go sleep without any
1067 * timeout and save power. When a work is queued or
1068 * something is made dirty - we will be woken up.
1069 */
1070 schedule();
1071 }
1072 } 1035 }
1073 1036
1074 /* Flush any work that raced with us exiting */ 1037 if (!list_empty(&bdi->work_list) ||
1075 if (!list_empty(&bdi->work_list)) 1038 (wb_has_dirty_io(wb) && dirty_writeback_interval))
1076 wb_do_writeback(wb, 1); 1039 queue_delayed_work(bdi_wq, &wb->dwork,
1040 msecs_to_jiffies(dirty_writeback_interval * 10));
1077 1041
1078 trace_writeback_thread_stop(bdi); 1042 current->flags &= ~PF_SWAPWRITE;
1079 return 0;
1080} 1043}
1081 1044
1082
1083/* 1045/*
1084 * Start writeback of `nr_pages' pages. If `nr_pages' is zero, write back 1046 * Start writeback of `nr_pages' pages. If `nr_pages' is zero, write back
1085 * the whole world. 1047 * the whole world.
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index a5ef27f5411a..c3881553f7d1 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -18,6 +18,7 @@
18#include <linux/writeback.h> 18#include <linux/writeback.h>
19#include <linux/atomic.h> 19#include <linux/atomic.h>
20#include <linux/sysctl.h> 20#include <linux/sysctl.h>
21#include <linux/workqueue.h>
21 22
22struct page; 23struct page;
23struct device; 24struct device;
@@ -27,7 +28,6 @@ struct dentry;
27 * Bits in backing_dev_info.state 28 * Bits in backing_dev_info.state
28 */ 29 */
29enum bdi_state { 30enum bdi_state {
30 BDI_pending, /* On its way to being activated */
31 BDI_wb_alloc, /* Default embedded wb allocated */ 31 BDI_wb_alloc, /* Default embedded wb allocated */
32 BDI_async_congested, /* The async (write) queue is getting full */ 32 BDI_async_congested, /* The async (write) queue is getting full */
33 BDI_sync_congested, /* The sync queue is getting full */ 33 BDI_sync_congested, /* The sync queue is getting full */
@@ -53,10 +53,8 @@ struct bdi_writeback {
53 unsigned int nr; 53 unsigned int nr;
54 54
55 unsigned long last_old_flush; /* last old data flush */ 55 unsigned long last_old_flush; /* last old data flush */
56 unsigned long last_active; /* last time bdi thread was active */
57 56
58 struct task_struct *task; /* writeback thread */ 57 struct delayed_work dwork; /* work item used for writeback */
59 struct timer_list wakeup_timer; /* used for delayed bdi thread wakeup */
60 struct list_head b_dirty; /* dirty inodes */ 58 struct list_head b_dirty; /* dirty inodes */
61 struct list_head b_io; /* parked for writeback */ 59 struct list_head b_io; /* parked for writeback */
62 struct list_head b_more_io; /* parked for more writeback */ 60 struct list_head b_more_io; /* parked for more writeback */
@@ -123,7 +121,7 @@ int bdi_setup_and_register(struct backing_dev_info *, char *, unsigned int);
123void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages, 121void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
124 enum wb_reason reason); 122 enum wb_reason reason);
125void bdi_start_background_writeback(struct backing_dev_info *bdi); 123void bdi_start_background_writeback(struct backing_dev_info *bdi);
126int bdi_writeback_thread(void *data); 124void bdi_writeback_workfn(struct work_struct *work);
127int bdi_has_dirty_io(struct backing_dev_info *bdi); 125int bdi_has_dirty_io(struct backing_dev_info *bdi);
128void bdi_wakeup_thread_delayed(struct backing_dev_info *bdi); 126void bdi_wakeup_thread_delayed(struct backing_dev_info *bdi);
129void bdi_lock_two(struct bdi_writeback *wb1, struct bdi_writeback *wb2); 127void bdi_lock_two(struct bdi_writeback *wb1, struct bdi_writeback *wb2);
@@ -131,6 +129,8 @@ void bdi_lock_two(struct bdi_writeback *wb1, struct bdi_writeback *wb2);
131extern spinlock_t bdi_lock; 129extern spinlock_t bdi_lock;
132extern struct list_head bdi_list; 130extern struct list_head bdi_list;
133 131
132extern struct workqueue_struct *bdi_wq;
133
134static inline int wb_has_dirty_io(struct bdi_writeback *wb) 134static inline int wb_has_dirty_io(struct bdi_writeback *wb)
135{ 135{
136 return !list_empty(&wb->b_dirty) || 136 return !list_empty(&wb->b_dirty) ||
@@ -335,11 +335,6 @@ static inline bool bdi_cap_swap_backed(struct backing_dev_info *bdi)
335 return bdi->capabilities & BDI_CAP_SWAP_BACKED; 335 return bdi->capabilities & BDI_CAP_SWAP_BACKED;
336} 336}
337 337
338static inline bool bdi_cap_flush_forker(struct backing_dev_info *bdi)
339{
340 return bdi == &default_backing_dev_info;
341}
342
343static inline bool mapping_cap_writeback_dirty(struct address_space *mapping) 338static inline bool mapping_cap_writeback_dirty(struct address_space *mapping)
344{ 339{
345 return bdi_cap_writeback_dirty(mapping->backing_dev_info); 340 return bdi_cap_writeback_dirty(mapping->backing_dev_info);
diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h
index 6a16fd2e70ed..464ea82e10db 100644
--- a/include/trace/events/writeback.h
+++ b/include/trace/events/writeback.h
@@ -183,7 +183,6 @@ DECLARE_EVENT_CLASS(writeback_work_class,
183DEFINE_EVENT(writeback_work_class, name, \ 183DEFINE_EVENT(writeback_work_class, name, \
184 TP_PROTO(struct backing_dev_info *bdi, struct wb_writeback_work *work), \ 184 TP_PROTO(struct backing_dev_info *bdi, struct wb_writeback_work *work), \
185 TP_ARGS(bdi, work)) 185 TP_ARGS(bdi, work))
186DEFINE_WRITEBACK_WORK_EVENT(writeback_nothread);
187DEFINE_WRITEBACK_WORK_EVENT(writeback_queue); 186DEFINE_WRITEBACK_WORK_EVENT(writeback_queue);
188DEFINE_WRITEBACK_WORK_EVENT(writeback_exec); 187DEFINE_WRITEBACK_WORK_EVENT(writeback_exec);
189DEFINE_WRITEBACK_WORK_EVENT(writeback_start); 188DEFINE_WRITEBACK_WORK_EVENT(writeback_start);
@@ -222,12 +221,8 @@ DEFINE_EVENT(writeback_class, name, \
222 221
223DEFINE_WRITEBACK_EVENT(writeback_nowork); 222DEFINE_WRITEBACK_EVENT(writeback_nowork);
224DEFINE_WRITEBACK_EVENT(writeback_wake_background); 223DEFINE_WRITEBACK_EVENT(writeback_wake_background);
225DEFINE_WRITEBACK_EVENT(writeback_wake_thread);
226DEFINE_WRITEBACK_EVENT(writeback_wake_forker_thread);
227DEFINE_WRITEBACK_EVENT(writeback_bdi_register); 224DEFINE_WRITEBACK_EVENT(writeback_bdi_register);
228DEFINE_WRITEBACK_EVENT(writeback_bdi_unregister); 225DEFINE_WRITEBACK_EVENT(writeback_bdi_unregister);
229DEFINE_WRITEBACK_EVENT(writeback_thread_start);
230DEFINE_WRITEBACK_EVENT(writeback_thread_stop);
231 226
232DECLARE_EVENT_CLASS(wbc_class, 227DECLARE_EVENT_CLASS(wbc_class,
233 TP_PROTO(struct writeback_control *wbc, struct backing_dev_info *bdi), 228 TP_PROTO(struct writeback_control *wbc, struct backing_dev_info *bdi),
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 657569b3fcf6..2857d4f6bca4 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -37,6 +37,9 @@ static struct class *bdi_class;
37DEFINE_SPINLOCK(bdi_lock); 37DEFINE_SPINLOCK(bdi_lock);
38LIST_HEAD(bdi_list); 38LIST_HEAD(bdi_list);
39 39
40/* bdi_wq serves all asynchronous writeback tasks */
41struct workqueue_struct *bdi_wq;
42
40void bdi_lock_two(struct bdi_writeback *wb1, struct bdi_writeback *wb2) 43void bdi_lock_two(struct bdi_writeback *wb1, struct bdi_writeback *wb2)
41{ 44{
42 if (wb1 < wb2) { 45 if (wb1 < wb2) {
@@ -255,6 +258,11 @@ static int __init default_bdi_init(void)
255{ 258{
256 int err; 259 int err;
257 260
261 bdi_wq = alloc_workqueue("writeback", WQ_MEM_RECLAIM | WQ_FREEZABLE |
262 WQ_UNBOUND, 0);
263 if (!bdi_wq)
264 return -ENOMEM;
265
258 err = bdi_init(&default_backing_dev_info); 266 err = bdi_init(&default_backing_dev_info);
259 if (!err) 267 if (!err)
260 bdi_register(&default_backing_dev_info, NULL, "default"); 268 bdi_register(&default_backing_dev_info, NULL, "default");
@@ -269,26 +277,6 @@ int bdi_has_dirty_io(struct backing_dev_info *bdi)
269 return wb_has_dirty_io(&bdi->wb); 277 return wb_has_dirty_io(&bdi->wb);
270} 278}
271 279
272static void wakeup_timer_fn(unsigned long data)
273{
274 struct backing_dev_info *bdi = (struct backing_dev_info *)data;
275
276 spin_lock_bh(&bdi->wb_lock);
277 if (bdi->wb.task) {
278 trace_writeback_wake_thread(bdi);
279 wake_up_process(bdi->wb.task);
280 } else if (bdi->dev) {
281 /*
282 * When bdi tasks are inactive for long time, they are killed.
283 * In this case we have to wake-up the forker thread which
284 * should create and run the bdi thread.
285 */
286 trace_writeback_wake_forker_thread(bdi);
287 wake_up_process(default_backing_dev_info.wb.task);
288 }
289 spin_unlock_bh(&bdi->wb_lock);
290}
291
292/* 280/*
293 * This function is used when the first inode for this bdi is marked dirty. It 281 * This function is used when the first inode for this bdi is marked dirty. It
294 * wakes-up the corresponding bdi thread which should then take care of the 282 * wakes-up the corresponding bdi thread which should then take care of the
@@ -305,176 +293,7 @@ void bdi_wakeup_thread_delayed(struct backing_dev_info *bdi)
305 unsigned long timeout; 293 unsigned long timeout;
306 294
307 timeout = msecs_to_jiffies(dirty_writeback_interval * 10); 295 timeout = msecs_to_jiffies(dirty_writeback_interval * 10);
308 mod_timer(&bdi->wb.wakeup_timer, jiffies + timeout); 296 mod_delayed_work(bdi_wq, &bdi->wb.dwork, timeout);
309}
310
311/*
312 * Calculate the longest interval (jiffies) bdi threads are allowed to be
313 * inactive.
314 */
315static unsigned long bdi_longest_inactive(void)
316{
317 unsigned long interval;
318
319 interval = msecs_to_jiffies(dirty_writeback_interval * 10);
320 return max(5UL * 60 * HZ, interval);
321}
322
323/*
324 * Clear pending bit and wakeup anybody waiting for flusher thread creation or
325 * shutdown
326 */
327static void bdi_clear_pending(struct backing_dev_info *bdi)
328{
329 clear_bit(BDI_pending, &bdi->state);
330 smp_mb__after_clear_bit();
331 wake_up_bit(&bdi->state, BDI_pending);
332}
333
334static int bdi_forker_thread(void *ptr)
335{
336 struct bdi_writeback *me = ptr;
337
338 current->flags |= PF_SWAPWRITE;
339 set_freezable();
340
341 /*
342 * Our parent may run at a different priority, just set us to normal
343 */
344 set_user_nice(current, 0);
345
346 for (;;) {
347 struct task_struct *task = NULL;
348 struct backing_dev_info *bdi;
349 enum {
350 NO_ACTION, /* Nothing to do */
351 FORK_THREAD, /* Fork bdi thread */
352 KILL_THREAD, /* Kill inactive bdi thread */
353 } action = NO_ACTION;
354
355 /*
356 * Temporary measure, we want to make sure we don't see
357 * dirty data on the default backing_dev_info
358 */
359 if (wb_has_dirty_io(me) || !list_empty(&me->bdi->work_list)) {
360 del_timer(&me->wakeup_timer);
361 wb_do_writeback(me, 0);
362 }
363
364 spin_lock_bh(&bdi_lock);
365 /*
366 * In the following loop we are going to check whether we have
367 * some work to do without any synchronization with tasks
368 * waking us up to do work for them. Set the task state here
369 * so that we don't miss wakeups after verifying conditions.
370 */
371 set_current_state(TASK_INTERRUPTIBLE);
372
373 list_for_each_entry(bdi, &bdi_list, bdi_list) {
374 bool have_dirty_io;
375
376 if (!bdi_cap_writeback_dirty(bdi) ||
377 bdi_cap_flush_forker(bdi))
378 continue;
379
380 WARN(!test_bit(BDI_registered, &bdi->state),
381 "bdi %p/%s is not registered!\n", bdi, bdi->name);
382
383 have_dirty_io = !list_empty(&bdi->work_list) ||
384 wb_has_dirty_io(&bdi->wb);
385
386 /*
387 * If the bdi has work to do, but the thread does not
388 * exist - create it.
389 */
390 if (!bdi->wb.task && have_dirty_io) {
391 /*
392 * Set the pending bit - if someone will try to
393 * unregister this bdi - it'll wait on this bit.
394 */
395 set_bit(BDI_pending, &bdi->state);
396 action = FORK_THREAD;
397 break;
398 }
399
400 spin_lock(&bdi->wb_lock);
401
402 /*
403 * If there is no work to do and the bdi thread was
404 * inactive long enough - kill it. The wb_lock is taken
405 * to make sure no-one adds more work to this bdi and
406 * wakes the bdi thread up.
407 */
408 if (bdi->wb.task && !have_dirty_io &&
409 time_after(jiffies, bdi->wb.last_active +
410 bdi_longest_inactive())) {
411 task = bdi->wb.task;
412 bdi->wb.task = NULL;
413 spin_unlock(&bdi->wb_lock);
414 set_bit(BDI_pending, &bdi->state);
415 action = KILL_THREAD;
416 break;
417 }
418 spin_unlock(&bdi->wb_lock);
419 }
420 spin_unlock_bh(&bdi_lock);
421
422 /* Keep working if default bdi still has things to do */
423 if (!list_empty(&me->bdi->work_list))
424 __set_current_state(TASK_RUNNING);
425
426 switch (action) {
427 case FORK_THREAD:
428 __set_current_state(TASK_RUNNING);
429 task = kthread_create(bdi_writeback_thread, &bdi->wb,
430 "flush-%s", dev_name(bdi->dev));
431 if (IS_ERR(task)) {
432 /*
433 * If thread creation fails, force writeout of
434 * the bdi from the thread. Hopefully 1024 is
435 * large enough for efficient IO.
436 */
437 writeback_inodes_wb(&bdi->wb, 1024,
438 WB_REASON_FORKER_THREAD);
439 } else {
440 /*
441 * The spinlock makes sure we do not lose
442 * wake-ups when racing with 'bdi_queue_work()'.
443 * And as soon as the bdi thread is visible, we
444 * can start it.
445 */
446 spin_lock_bh(&bdi->wb_lock);
447 bdi->wb.task = task;
448 spin_unlock_bh(&bdi->wb_lock);
449 wake_up_process(task);
450 }
451 bdi_clear_pending(bdi);
452 break;
453
454 case KILL_THREAD:
455 __set_current_state(TASK_RUNNING);
456 kthread_stop(task);
457 bdi_clear_pending(bdi);
458 break;
459
460 case NO_ACTION:
461 if (!wb_has_dirty_io(me) || !dirty_writeback_interval)
462 /*
463 * There are no dirty data. The only thing we
464 * should now care about is checking for
465 * inactive bdi threads and killing them. Thus,
466 * let's sleep for longer time, save energy and
467 * be friendly for battery-driven devices.
468 */
469 schedule_timeout(bdi_longest_inactive());
470 else
471 schedule_timeout(msecs_to_jiffies(dirty_writeback_interval * 10));
472 try_to_freeze();
473 break;
474 }
475 }
476
477 return 0;
478} 297}
479 298
480/* 299/*
@@ -487,6 +306,9 @@ static void bdi_remove_from_list(struct backing_dev_info *bdi)
487 spin_unlock_bh(&bdi_lock); 306 spin_unlock_bh(&bdi_lock);
488 307
489 synchronize_rcu_expedited(); 308 synchronize_rcu_expedited();
309
310 /* bdi_list is now unused, clear it to mark @bdi dying */
311 INIT_LIST_HEAD(&bdi->bdi_list);
490} 312}
491 313
492int bdi_register(struct backing_dev_info *bdi, struct device *parent, 314int bdi_register(struct backing_dev_info *bdi, struct device *parent,
@@ -506,20 +328,6 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent,
506 328
507 bdi->dev = dev; 329 bdi->dev = dev;
508 330
509 /*
510 * Just start the forker thread for our default backing_dev_info,
511 * and add other bdi's to the list. They will get a thread created
512 * on-demand when they need it.
513 */
514 if (bdi_cap_flush_forker(bdi)) {
515 struct bdi_writeback *wb = &bdi->wb;
516
517 wb->task = kthread_run(bdi_forker_thread, wb, "bdi-%s",
518 dev_name(dev));
519 if (IS_ERR(wb->task))
520 return PTR_ERR(wb->task);
521 }
522
523 bdi_debug_register(bdi, dev_name(dev)); 331 bdi_debug_register(bdi, dev_name(dev));
524 set_bit(BDI_registered, &bdi->state); 332 set_bit(BDI_registered, &bdi->state);
525 333
@@ -543,8 +351,6 @@ EXPORT_SYMBOL(bdi_register_dev);
543 */ 351 */
544static void bdi_wb_shutdown(struct backing_dev_info *bdi) 352static void bdi_wb_shutdown(struct backing_dev_info *bdi)
545{ 353{
546 struct task_struct *task;
547
548 if (!bdi_cap_writeback_dirty(bdi)) 354 if (!bdi_cap_writeback_dirty(bdi))
549 return; 355 return;
550 356
@@ -554,22 +360,20 @@ static void bdi_wb_shutdown(struct backing_dev_info *bdi)
554 bdi_remove_from_list(bdi); 360 bdi_remove_from_list(bdi);
555 361
556 /* 362 /*
557 * If setup is pending, wait for that to complete first 363 * Drain work list and shutdown the delayed_work. At this point,
364 * @bdi->bdi_list is empty telling bdi_Writeback_workfn() that @bdi
365 * is dying and its work_list needs to be drained no matter what.
558 */ 366 */
559 wait_on_bit(&bdi->state, BDI_pending, bdi_sched_wait, 367 mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0);
560 TASK_UNINTERRUPTIBLE); 368 flush_delayed_work(&bdi->wb.dwork);
369 WARN_ON(!list_empty(&bdi->work_list));
561 370
562 /* 371 /*
563 * Finally, kill the kernel thread. We don't need to be RCU 372 * This shouldn't be necessary unless @bdi for some reason has
564 * safe anymore, since the bdi is gone from visibility. 373 * unflushed dirty IO after work_list is drained. Do it anyway
374 * just in case.
565 */ 375 */
566 spin_lock_bh(&bdi->wb_lock); 376 cancel_delayed_work_sync(&bdi->wb.dwork);
567 task = bdi->wb.task;
568 bdi->wb.task = NULL;
569 spin_unlock_bh(&bdi->wb_lock);
570
571 if (task)
572 kthread_stop(task);
573} 377}
574 378
575/* 379/*
@@ -595,10 +399,8 @@ void bdi_unregister(struct backing_dev_info *bdi)
595 bdi_set_min_ratio(bdi, 0); 399 bdi_set_min_ratio(bdi, 0);
596 trace_writeback_bdi_unregister(bdi); 400 trace_writeback_bdi_unregister(bdi);
597 bdi_prune_sb(bdi); 401 bdi_prune_sb(bdi);
598 del_timer_sync(&bdi->wb.wakeup_timer);
599 402
600 if (!bdi_cap_flush_forker(bdi)) 403 bdi_wb_shutdown(bdi);
601 bdi_wb_shutdown(bdi);
602 bdi_debug_unregister(bdi); 404 bdi_debug_unregister(bdi);
603 405
604 spin_lock_bh(&bdi->wb_lock); 406 spin_lock_bh(&bdi->wb_lock);
@@ -620,7 +422,7 @@ static void bdi_wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi)
620 INIT_LIST_HEAD(&wb->b_io); 422 INIT_LIST_HEAD(&wb->b_io);
621 INIT_LIST_HEAD(&wb->b_more_io); 423 INIT_LIST_HEAD(&wb->b_more_io);
622 spin_lock_init(&wb->list_lock); 424 spin_lock_init(&wb->list_lock);
623 setup_timer(&wb->wakeup_timer, wakeup_timer_fn, (unsigned long)bdi); 425 INIT_DELAYED_WORK(&wb->dwork, bdi_writeback_workfn);
624} 426}
625 427
626/* 428/*
@@ -693,12 +495,11 @@ void bdi_destroy(struct backing_dev_info *bdi)
693 bdi_unregister(bdi); 495 bdi_unregister(bdi);
694 496
695 /* 497 /*
696 * If bdi_unregister() had already been called earlier, the 498 * If bdi_unregister() had already been called earlier, the dwork
697 * wakeup_timer could still be armed because bdi_prune_sb() 499 * could still be pending because bdi_prune_sb() can race with the
698 * can race with the bdi_wakeup_thread_delayed() calls from 500 * bdi_wakeup_thread_delayed() calls from __mark_inode_dirty().
699 * __mark_inode_dirty().
700 */ 501 */
701 del_timer_sync(&bdi->wb.wakeup_timer); 502 cancel_delayed_work_sync(&bdi->wb.dwork);
702 503
703 for (i = 0; i < NR_BDI_STAT_ITEMS; i++) 504 for (i = 0; i < NR_BDI_STAT_ITEMS; i++)
704 percpu_counter_destroy(&bdi->bdi_stat[i]); 505 percpu_counter_destroy(&bdi->bdi_stat[i]);