diff options
-rw-r--r-- | fs/fs-writeback.c | 102 | ||||
-rw-r--r-- | include/linux/backing-dev.h | 15 | ||||
-rw-r--r-- | include/trace/events/writeback.h | 5 | ||||
-rw-r--r-- | mm/backing-dev.c | 255 |
4 files changed, 65 insertions, 312 deletions
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 21f46fb3a101..8067d3719e94 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
@@ -22,7 +22,6 @@ | |||
22 | #include <linux/mm.h> | 22 | #include <linux/mm.h> |
23 | #include <linux/pagemap.h> | 23 | #include <linux/pagemap.h> |
24 | #include <linux/kthread.h> | 24 | #include <linux/kthread.h> |
25 | #include <linux/freezer.h> | ||
26 | #include <linux/writeback.h> | 25 | #include <linux/writeback.h> |
27 | #include <linux/blkdev.h> | 26 | #include <linux/blkdev.h> |
28 | #include <linux/backing-dev.h> | 27 | #include <linux/backing-dev.h> |
@@ -88,20 +87,6 @@ static inline struct inode *wb_inode(struct list_head *head) | |||
88 | #define CREATE_TRACE_POINTS | 87 | #define CREATE_TRACE_POINTS |
89 | #include <trace/events/writeback.h> | 88 | #include <trace/events/writeback.h> |
90 | 89 | ||
91 | /* Wakeup flusher thread or forker thread to fork it. Requires bdi->wb_lock. */ | ||
92 | static void bdi_wakeup_flusher(struct backing_dev_info *bdi) | ||
93 | { | ||
94 | if (bdi->wb.task) { | ||
95 | wake_up_process(bdi->wb.task); | ||
96 | } else { | ||
97 | /* | ||
98 | * The bdi thread isn't there, wake up the forker thread which | ||
99 | * will create and run it. | ||
100 | */ | ||
101 | wake_up_process(default_backing_dev_info.wb.task); | ||
102 | } | ||
103 | } | ||
104 | |||
105 | static void bdi_queue_work(struct backing_dev_info *bdi, | 90 | static void bdi_queue_work(struct backing_dev_info *bdi, |
106 | struct wb_writeback_work *work) | 91 | struct wb_writeback_work *work) |
107 | { | 92 | { |
@@ -109,10 +94,9 @@ static void bdi_queue_work(struct backing_dev_info *bdi, | |||
109 | 94 | ||
110 | spin_lock_bh(&bdi->wb_lock); | 95 | spin_lock_bh(&bdi->wb_lock); |
111 | list_add_tail(&work->list, &bdi->work_list); | 96 | list_add_tail(&work->list, &bdi->work_list); |
112 | if (!bdi->wb.task) | ||
113 | trace_writeback_nothread(bdi, work); | ||
114 | bdi_wakeup_flusher(bdi); | ||
115 | spin_unlock_bh(&bdi->wb_lock); | 97 | spin_unlock_bh(&bdi->wb_lock); |
98 | |||
99 | mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0); | ||
116 | } | 100 | } |
117 | 101 | ||
118 | static void | 102 | static void |
@@ -127,10 +111,8 @@ __bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages, | |||
127 | */ | 111 | */ |
128 | work = kzalloc(sizeof(*work), GFP_ATOMIC); | 112 | work = kzalloc(sizeof(*work), GFP_ATOMIC); |
129 | if (!work) { | 113 | if (!work) { |
130 | if (bdi->wb.task) { | 114 | trace_writeback_nowork(bdi); |
131 | trace_writeback_nowork(bdi); | 115 | mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0); |
132 | wake_up_process(bdi->wb.task); | ||
133 | } | ||
134 | return; | 116 | return; |
135 | } | 117 | } |
136 | 118 | ||
@@ -177,9 +159,7 @@ void bdi_start_background_writeback(struct backing_dev_info *bdi) | |||
177 | * writeback as soon as there is no other work to do. | 159 | * writeback as soon as there is no other work to do. |
178 | */ | 160 | */ |
179 | trace_writeback_wake_background(bdi); | 161 | trace_writeback_wake_background(bdi); |
180 | spin_lock_bh(&bdi->wb_lock); | 162 | mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0); |
181 | bdi_wakeup_flusher(bdi); | ||
182 | spin_unlock_bh(&bdi->wb_lock); | ||
183 | } | 163 | } |
184 | 164 | ||
185 | /* | 165 | /* |
@@ -1020,66 +1000,48 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait) | |||
1020 | 1000 | ||
1021 | /* | 1001 | /* |
1022 | * Handle writeback of dirty data for the device backed by this bdi. Also | 1002 | * Handle writeback of dirty data for the device backed by this bdi. Also |
1023 | * wakes up periodically and does kupdated style flushing. | 1003 | * reschedules periodically and does kupdated style flushing. |
1024 | */ | 1004 | */ |
1025 | int bdi_writeback_thread(void *data) | 1005 | void bdi_writeback_workfn(struct work_struct *work) |
1026 | { | 1006 | { |
1027 | struct bdi_writeback *wb = data; | 1007 | struct bdi_writeback *wb = container_of(to_delayed_work(work), |
1008 | struct bdi_writeback, dwork); | ||
1028 | struct backing_dev_info *bdi = wb->bdi; | 1009 | struct backing_dev_info *bdi = wb->bdi; |
1029 | long pages_written; | 1010 | long pages_written; |
1030 | 1011 | ||
1031 | current->flags |= PF_SWAPWRITE; | 1012 | current->flags |= PF_SWAPWRITE; |
1032 | set_freezable(); | ||
1033 | wb->last_active = jiffies; | ||
1034 | |||
1035 | /* | ||
1036 | * Our parent may run at a different priority, just set us to normal | ||
1037 | */ | ||
1038 | set_user_nice(current, 0); | ||
1039 | |||
1040 | trace_writeback_thread_start(bdi); | ||
1041 | 1013 | ||
1042 | while (!kthread_freezable_should_stop(NULL)) { | 1014 | if (likely(!current_is_workqueue_rescuer() || |
1015 | list_empty(&bdi->bdi_list))) { | ||
1043 | /* | 1016 | /* |
1044 | * Remove own delayed wake-up timer, since we are already awake | 1017 | * The normal path. Keep writing back @bdi until its |
1045 | * and we'll take care of the periodic write-back. | 1018 | * work_list is empty. Note that this path is also taken |
1019 | * if @bdi is shutting down even when we're running off the | ||
1020 | * rescuer as work_list needs to be drained. | ||
1046 | */ | 1021 | */ |
1047 | del_timer(&wb->wakeup_timer); | 1022 | do { |
1048 | 1023 | pages_written = wb_do_writeback(wb, 0); | |
1049 | pages_written = wb_do_writeback(wb, 0); | 1024 | trace_writeback_pages_written(pages_written); |
1050 | 1025 | } while (!list_empty(&bdi->work_list)); | |
1026 | } else { | ||
1027 | /* | ||
1028 | * bdi_wq can't get enough workers and we're running off | ||
1029 | * the emergency worker. Don't hog it. Hopefully, 1024 is | ||
1030 | * enough for efficient IO. | ||
1031 | */ | ||
1032 | pages_written = writeback_inodes_wb(&bdi->wb, 1024, | ||
1033 | WB_REASON_FORKER_THREAD); | ||
1051 | trace_writeback_pages_written(pages_written); | 1034 | trace_writeback_pages_written(pages_written); |
1052 | |||
1053 | if (pages_written) | ||
1054 | wb->last_active = jiffies; | ||
1055 | |||
1056 | set_current_state(TASK_INTERRUPTIBLE); | ||
1057 | if (!list_empty(&bdi->work_list) || kthread_should_stop()) { | ||
1058 | __set_current_state(TASK_RUNNING); | ||
1059 | continue; | ||
1060 | } | ||
1061 | |||
1062 | if (wb_has_dirty_io(wb) && dirty_writeback_interval) | ||
1063 | schedule_timeout(msecs_to_jiffies(dirty_writeback_interval * 10)); | ||
1064 | else { | ||
1065 | /* | ||
1066 | * We have nothing to do, so can go sleep without any | ||
1067 | * timeout and save power. When a work is queued or | ||
1068 | * something is made dirty - we will be woken up. | ||
1069 | */ | ||
1070 | schedule(); | ||
1071 | } | ||
1072 | } | 1035 | } |
1073 | 1036 | ||
1074 | /* Flush any work that raced with us exiting */ | 1037 | if (!list_empty(&bdi->work_list) || |
1075 | if (!list_empty(&bdi->work_list)) | 1038 | (wb_has_dirty_io(wb) && dirty_writeback_interval)) |
1076 | wb_do_writeback(wb, 1); | 1039 | queue_delayed_work(bdi_wq, &wb->dwork, |
1040 | msecs_to_jiffies(dirty_writeback_interval * 10)); | ||
1077 | 1041 | ||
1078 | trace_writeback_thread_stop(bdi); | 1042 | current->flags &= ~PF_SWAPWRITE; |
1079 | return 0; | ||
1080 | } | 1043 | } |
1081 | 1044 | ||
1082 | |||
1083 | /* | 1045 | /* |
1084 | * Start writeback of `nr_pages' pages. If `nr_pages' is zero, write back | 1046 | * Start writeback of `nr_pages' pages. If `nr_pages' is zero, write back |
1085 | * the whole world. | 1047 | * the whole world. |
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index a5ef27f5411a..c3881553f7d1 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <linux/writeback.h> | 18 | #include <linux/writeback.h> |
19 | #include <linux/atomic.h> | 19 | #include <linux/atomic.h> |
20 | #include <linux/sysctl.h> | 20 | #include <linux/sysctl.h> |
21 | #include <linux/workqueue.h> | ||
21 | 22 | ||
22 | struct page; | 23 | struct page; |
23 | struct device; | 24 | struct device; |
@@ -27,7 +28,6 @@ struct dentry; | |||
27 | * Bits in backing_dev_info.state | 28 | * Bits in backing_dev_info.state |
28 | */ | 29 | */ |
29 | enum bdi_state { | 30 | enum bdi_state { |
30 | BDI_pending, /* On its way to being activated */ | ||
31 | BDI_wb_alloc, /* Default embedded wb allocated */ | 31 | BDI_wb_alloc, /* Default embedded wb allocated */ |
32 | BDI_async_congested, /* The async (write) queue is getting full */ | 32 | BDI_async_congested, /* The async (write) queue is getting full */ |
33 | BDI_sync_congested, /* The sync queue is getting full */ | 33 | BDI_sync_congested, /* The sync queue is getting full */ |
@@ -53,10 +53,8 @@ struct bdi_writeback { | |||
53 | unsigned int nr; | 53 | unsigned int nr; |
54 | 54 | ||
55 | unsigned long last_old_flush; /* last old data flush */ | 55 | unsigned long last_old_flush; /* last old data flush */ |
56 | unsigned long last_active; /* last time bdi thread was active */ | ||
57 | 56 | ||
58 | struct task_struct *task; /* writeback thread */ | 57 | struct delayed_work dwork; /* work item used for writeback */ |
59 | struct timer_list wakeup_timer; /* used for delayed bdi thread wakeup */ | ||
60 | struct list_head b_dirty; /* dirty inodes */ | 58 | struct list_head b_dirty; /* dirty inodes */ |
61 | struct list_head b_io; /* parked for writeback */ | 59 | struct list_head b_io; /* parked for writeback */ |
62 | struct list_head b_more_io; /* parked for more writeback */ | 60 | struct list_head b_more_io; /* parked for more writeback */ |
@@ -123,7 +121,7 @@ int bdi_setup_and_register(struct backing_dev_info *, char *, unsigned int); | |||
123 | void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages, | 121 | void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages, |
124 | enum wb_reason reason); | 122 | enum wb_reason reason); |
125 | void bdi_start_background_writeback(struct backing_dev_info *bdi); | 123 | void bdi_start_background_writeback(struct backing_dev_info *bdi); |
126 | int bdi_writeback_thread(void *data); | 124 | void bdi_writeback_workfn(struct work_struct *work); |
127 | int bdi_has_dirty_io(struct backing_dev_info *bdi); | 125 | int bdi_has_dirty_io(struct backing_dev_info *bdi); |
128 | void bdi_wakeup_thread_delayed(struct backing_dev_info *bdi); | 126 | void bdi_wakeup_thread_delayed(struct backing_dev_info *bdi); |
129 | void bdi_lock_two(struct bdi_writeback *wb1, struct bdi_writeback *wb2); | 127 | void bdi_lock_two(struct bdi_writeback *wb1, struct bdi_writeback *wb2); |
@@ -131,6 +129,8 @@ void bdi_lock_two(struct bdi_writeback *wb1, struct bdi_writeback *wb2); | |||
131 | extern spinlock_t bdi_lock; | 129 | extern spinlock_t bdi_lock; |
132 | extern struct list_head bdi_list; | 130 | extern struct list_head bdi_list; |
133 | 131 | ||
132 | extern struct workqueue_struct *bdi_wq; | ||
133 | |||
134 | static inline int wb_has_dirty_io(struct bdi_writeback *wb) | 134 | static inline int wb_has_dirty_io(struct bdi_writeback *wb) |
135 | { | 135 | { |
136 | return !list_empty(&wb->b_dirty) || | 136 | return !list_empty(&wb->b_dirty) || |
@@ -335,11 +335,6 @@ static inline bool bdi_cap_swap_backed(struct backing_dev_info *bdi) | |||
335 | return bdi->capabilities & BDI_CAP_SWAP_BACKED; | 335 | return bdi->capabilities & BDI_CAP_SWAP_BACKED; |
336 | } | 336 | } |
337 | 337 | ||
338 | static inline bool bdi_cap_flush_forker(struct backing_dev_info *bdi) | ||
339 | { | ||
340 | return bdi == &default_backing_dev_info; | ||
341 | } | ||
342 | |||
343 | static inline bool mapping_cap_writeback_dirty(struct address_space *mapping) | 338 | static inline bool mapping_cap_writeback_dirty(struct address_space *mapping) |
344 | { | 339 | { |
345 | return bdi_cap_writeback_dirty(mapping->backing_dev_info); | 340 | return bdi_cap_writeback_dirty(mapping->backing_dev_info); |
diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h index 6a16fd2e70ed..464ea82e10db 100644 --- a/include/trace/events/writeback.h +++ b/include/trace/events/writeback.h | |||
@@ -183,7 +183,6 @@ DECLARE_EVENT_CLASS(writeback_work_class, | |||
183 | DEFINE_EVENT(writeback_work_class, name, \ | 183 | DEFINE_EVENT(writeback_work_class, name, \ |
184 | TP_PROTO(struct backing_dev_info *bdi, struct wb_writeback_work *work), \ | 184 | TP_PROTO(struct backing_dev_info *bdi, struct wb_writeback_work *work), \ |
185 | TP_ARGS(bdi, work)) | 185 | TP_ARGS(bdi, work)) |
186 | DEFINE_WRITEBACK_WORK_EVENT(writeback_nothread); | ||
187 | DEFINE_WRITEBACK_WORK_EVENT(writeback_queue); | 186 | DEFINE_WRITEBACK_WORK_EVENT(writeback_queue); |
188 | DEFINE_WRITEBACK_WORK_EVENT(writeback_exec); | 187 | DEFINE_WRITEBACK_WORK_EVENT(writeback_exec); |
189 | DEFINE_WRITEBACK_WORK_EVENT(writeback_start); | 188 | DEFINE_WRITEBACK_WORK_EVENT(writeback_start); |
@@ -222,12 +221,8 @@ DEFINE_EVENT(writeback_class, name, \ | |||
222 | 221 | ||
223 | DEFINE_WRITEBACK_EVENT(writeback_nowork); | 222 | DEFINE_WRITEBACK_EVENT(writeback_nowork); |
224 | DEFINE_WRITEBACK_EVENT(writeback_wake_background); | 223 | DEFINE_WRITEBACK_EVENT(writeback_wake_background); |
225 | DEFINE_WRITEBACK_EVENT(writeback_wake_thread); | ||
226 | DEFINE_WRITEBACK_EVENT(writeback_wake_forker_thread); | ||
227 | DEFINE_WRITEBACK_EVENT(writeback_bdi_register); | 224 | DEFINE_WRITEBACK_EVENT(writeback_bdi_register); |
228 | DEFINE_WRITEBACK_EVENT(writeback_bdi_unregister); | 225 | DEFINE_WRITEBACK_EVENT(writeback_bdi_unregister); |
229 | DEFINE_WRITEBACK_EVENT(writeback_thread_start); | ||
230 | DEFINE_WRITEBACK_EVENT(writeback_thread_stop); | ||
231 | 226 | ||
232 | DECLARE_EVENT_CLASS(wbc_class, | 227 | DECLARE_EVENT_CLASS(wbc_class, |
233 | TP_PROTO(struct writeback_control *wbc, struct backing_dev_info *bdi), | 228 | TP_PROTO(struct writeback_control *wbc, struct backing_dev_info *bdi), |
diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 657569b3fcf6..2857d4f6bca4 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c | |||
@@ -37,6 +37,9 @@ static struct class *bdi_class; | |||
37 | DEFINE_SPINLOCK(bdi_lock); | 37 | DEFINE_SPINLOCK(bdi_lock); |
38 | LIST_HEAD(bdi_list); | 38 | LIST_HEAD(bdi_list); |
39 | 39 | ||
40 | /* bdi_wq serves all asynchronous writeback tasks */ | ||
41 | struct workqueue_struct *bdi_wq; | ||
42 | |||
40 | void bdi_lock_two(struct bdi_writeback *wb1, struct bdi_writeback *wb2) | 43 | void bdi_lock_two(struct bdi_writeback *wb1, struct bdi_writeback *wb2) |
41 | { | 44 | { |
42 | if (wb1 < wb2) { | 45 | if (wb1 < wb2) { |
@@ -255,6 +258,11 @@ static int __init default_bdi_init(void) | |||
255 | { | 258 | { |
256 | int err; | 259 | int err; |
257 | 260 | ||
261 | bdi_wq = alloc_workqueue("writeback", WQ_MEM_RECLAIM | WQ_FREEZABLE | | ||
262 | WQ_UNBOUND, 0); | ||
263 | if (!bdi_wq) | ||
264 | return -ENOMEM; | ||
265 | |||
258 | err = bdi_init(&default_backing_dev_info); | 266 | err = bdi_init(&default_backing_dev_info); |
259 | if (!err) | 267 | if (!err) |
260 | bdi_register(&default_backing_dev_info, NULL, "default"); | 268 | bdi_register(&default_backing_dev_info, NULL, "default"); |
@@ -269,26 +277,6 @@ int bdi_has_dirty_io(struct backing_dev_info *bdi) | |||
269 | return wb_has_dirty_io(&bdi->wb); | 277 | return wb_has_dirty_io(&bdi->wb); |
270 | } | 278 | } |
271 | 279 | ||
272 | static void wakeup_timer_fn(unsigned long data) | ||
273 | { | ||
274 | struct backing_dev_info *bdi = (struct backing_dev_info *)data; | ||
275 | |||
276 | spin_lock_bh(&bdi->wb_lock); | ||
277 | if (bdi->wb.task) { | ||
278 | trace_writeback_wake_thread(bdi); | ||
279 | wake_up_process(bdi->wb.task); | ||
280 | } else if (bdi->dev) { | ||
281 | /* | ||
282 | * When bdi tasks are inactive for long time, they are killed. | ||
283 | * In this case we have to wake-up the forker thread which | ||
284 | * should create and run the bdi thread. | ||
285 | */ | ||
286 | trace_writeback_wake_forker_thread(bdi); | ||
287 | wake_up_process(default_backing_dev_info.wb.task); | ||
288 | } | ||
289 | spin_unlock_bh(&bdi->wb_lock); | ||
290 | } | ||
291 | |||
292 | /* | 280 | /* |
293 | * This function is used when the first inode for this bdi is marked dirty. It | 281 | * This function is used when the first inode for this bdi is marked dirty. It |
294 | * wakes-up the corresponding bdi thread which should then take care of the | 282 | * wakes-up the corresponding bdi thread which should then take care of the |
@@ -305,176 +293,7 @@ void bdi_wakeup_thread_delayed(struct backing_dev_info *bdi) | |||
305 | unsigned long timeout; | 293 | unsigned long timeout; |
306 | 294 | ||
307 | timeout = msecs_to_jiffies(dirty_writeback_interval * 10); | 295 | timeout = msecs_to_jiffies(dirty_writeback_interval * 10); |
308 | mod_timer(&bdi->wb.wakeup_timer, jiffies + timeout); | 296 | mod_delayed_work(bdi_wq, &bdi->wb.dwork, timeout); |
309 | } | ||
310 | |||
311 | /* | ||
312 | * Calculate the longest interval (jiffies) bdi threads are allowed to be | ||
313 | * inactive. | ||
314 | */ | ||
315 | static unsigned long bdi_longest_inactive(void) | ||
316 | { | ||
317 | unsigned long interval; | ||
318 | |||
319 | interval = msecs_to_jiffies(dirty_writeback_interval * 10); | ||
320 | return max(5UL * 60 * HZ, interval); | ||
321 | } | ||
322 | |||
323 | /* | ||
324 | * Clear pending bit and wakeup anybody waiting for flusher thread creation or | ||
325 | * shutdown | ||
326 | */ | ||
327 | static void bdi_clear_pending(struct backing_dev_info *bdi) | ||
328 | { | ||
329 | clear_bit(BDI_pending, &bdi->state); | ||
330 | smp_mb__after_clear_bit(); | ||
331 | wake_up_bit(&bdi->state, BDI_pending); | ||
332 | } | ||
333 | |||
334 | static int bdi_forker_thread(void *ptr) | ||
335 | { | ||
336 | struct bdi_writeback *me = ptr; | ||
337 | |||
338 | current->flags |= PF_SWAPWRITE; | ||
339 | set_freezable(); | ||
340 | |||
341 | /* | ||
342 | * Our parent may run at a different priority, just set us to normal | ||
343 | */ | ||
344 | set_user_nice(current, 0); | ||
345 | |||
346 | for (;;) { | ||
347 | struct task_struct *task = NULL; | ||
348 | struct backing_dev_info *bdi; | ||
349 | enum { | ||
350 | NO_ACTION, /* Nothing to do */ | ||
351 | FORK_THREAD, /* Fork bdi thread */ | ||
352 | KILL_THREAD, /* Kill inactive bdi thread */ | ||
353 | } action = NO_ACTION; | ||
354 | |||
355 | /* | ||
356 | * Temporary measure, we want to make sure we don't see | ||
357 | * dirty data on the default backing_dev_info | ||
358 | */ | ||
359 | if (wb_has_dirty_io(me) || !list_empty(&me->bdi->work_list)) { | ||
360 | del_timer(&me->wakeup_timer); | ||
361 | wb_do_writeback(me, 0); | ||
362 | } | ||
363 | |||
364 | spin_lock_bh(&bdi_lock); | ||
365 | /* | ||
366 | * In the following loop we are going to check whether we have | ||
367 | * some work to do without any synchronization with tasks | ||
368 | * waking us up to do work for them. Set the task state here | ||
369 | * so that we don't miss wakeups after verifying conditions. | ||
370 | */ | ||
371 | set_current_state(TASK_INTERRUPTIBLE); | ||
372 | |||
373 | list_for_each_entry(bdi, &bdi_list, bdi_list) { | ||
374 | bool have_dirty_io; | ||
375 | |||
376 | if (!bdi_cap_writeback_dirty(bdi) || | ||
377 | bdi_cap_flush_forker(bdi)) | ||
378 | continue; | ||
379 | |||
380 | WARN(!test_bit(BDI_registered, &bdi->state), | ||
381 | "bdi %p/%s is not registered!\n", bdi, bdi->name); | ||
382 | |||
383 | have_dirty_io = !list_empty(&bdi->work_list) || | ||
384 | wb_has_dirty_io(&bdi->wb); | ||
385 | |||
386 | /* | ||
387 | * If the bdi has work to do, but the thread does not | ||
388 | * exist - create it. | ||
389 | */ | ||
390 | if (!bdi->wb.task && have_dirty_io) { | ||
391 | /* | ||
392 | * Set the pending bit - if someone will try to | ||
393 | * unregister this bdi - it'll wait on this bit. | ||
394 | */ | ||
395 | set_bit(BDI_pending, &bdi->state); | ||
396 | action = FORK_THREAD; | ||
397 | break; | ||
398 | } | ||
399 | |||
400 | spin_lock(&bdi->wb_lock); | ||
401 | |||
402 | /* | ||
403 | * If there is no work to do and the bdi thread was | ||
404 | * inactive long enough - kill it. The wb_lock is taken | ||
405 | * to make sure no-one adds more work to this bdi and | ||
406 | * wakes the bdi thread up. | ||
407 | */ | ||
408 | if (bdi->wb.task && !have_dirty_io && | ||
409 | time_after(jiffies, bdi->wb.last_active + | ||
410 | bdi_longest_inactive())) { | ||
411 | task = bdi->wb.task; | ||
412 | bdi->wb.task = NULL; | ||
413 | spin_unlock(&bdi->wb_lock); | ||
414 | set_bit(BDI_pending, &bdi->state); | ||
415 | action = KILL_THREAD; | ||
416 | break; | ||
417 | } | ||
418 | spin_unlock(&bdi->wb_lock); | ||
419 | } | ||
420 | spin_unlock_bh(&bdi_lock); | ||
421 | |||
422 | /* Keep working if default bdi still has things to do */ | ||
423 | if (!list_empty(&me->bdi->work_list)) | ||
424 | __set_current_state(TASK_RUNNING); | ||
425 | |||
426 | switch (action) { | ||
427 | case FORK_THREAD: | ||
428 | __set_current_state(TASK_RUNNING); | ||
429 | task = kthread_create(bdi_writeback_thread, &bdi->wb, | ||
430 | "flush-%s", dev_name(bdi->dev)); | ||
431 | if (IS_ERR(task)) { | ||
432 | /* | ||
433 | * If thread creation fails, force writeout of | ||
434 | * the bdi from the thread. Hopefully 1024 is | ||
435 | * large enough for efficient IO. | ||
436 | */ | ||
437 | writeback_inodes_wb(&bdi->wb, 1024, | ||
438 | WB_REASON_FORKER_THREAD); | ||
439 | } else { | ||
440 | /* | ||
441 | * The spinlock makes sure we do not lose | ||
442 | * wake-ups when racing with 'bdi_queue_work()'. | ||
443 | * And as soon as the bdi thread is visible, we | ||
444 | * can start it. | ||
445 | */ | ||
446 | spin_lock_bh(&bdi->wb_lock); | ||
447 | bdi->wb.task = task; | ||
448 | spin_unlock_bh(&bdi->wb_lock); | ||
449 | wake_up_process(task); | ||
450 | } | ||
451 | bdi_clear_pending(bdi); | ||
452 | break; | ||
453 | |||
454 | case KILL_THREAD: | ||
455 | __set_current_state(TASK_RUNNING); | ||
456 | kthread_stop(task); | ||
457 | bdi_clear_pending(bdi); | ||
458 | break; | ||
459 | |||
460 | case NO_ACTION: | ||
461 | if (!wb_has_dirty_io(me) || !dirty_writeback_interval) | ||
462 | /* | ||
463 | * There are no dirty data. The only thing we | ||
464 | * should now care about is checking for | ||
465 | * inactive bdi threads and killing them. Thus, | ||
466 | * let's sleep for longer time, save energy and | ||
467 | * be friendly for battery-driven devices. | ||
468 | */ | ||
469 | schedule_timeout(bdi_longest_inactive()); | ||
470 | else | ||
471 | schedule_timeout(msecs_to_jiffies(dirty_writeback_interval * 10)); | ||
472 | try_to_freeze(); | ||
473 | break; | ||
474 | } | ||
475 | } | ||
476 | |||
477 | return 0; | ||
478 | } | 297 | } |
479 | 298 | ||
480 | /* | 299 | /* |
@@ -487,6 +306,9 @@ static void bdi_remove_from_list(struct backing_dev_info *bdi) | |||
487 | spin_unlock_bh(&bdi_lock); | 306 | spin_unlock_bh(&bdi_lock); |
488 | 307 | ||
489 | synchronize_rcu_expedited(); | 308 | synchronize_rcu_expedited(); |
309 | |||
310 | /* bdi_list is now unused, clear it to mark @bdi dying */ | ||
311 | INIT_LIST_HEAD(&bdi->bdi_list); | ||
490 | } | 312 | } |
491 | 313 | ||
492 | int bdi_register(struct backing_dev_info *bdi, struct device *parent, | 314 | int bdi_register(struct backing_dev_info *bdi, struct device *parent, |
@@ -506,20 +328,6 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent, | |||
506 | 328 | ||
507 | bdi->dev = dev; | 329 | bdi->dev = dev; |
508 | 330 | ||
509 | /* | ||
510 | * Just start the forker thread for our default backing_dev_info, | ||
511 | * and add other bdi's to the list. They will get a thread created | ||
512 | * on-demand when they need it. | ||
513 | */ | ||
514 | if (bdi_cap_flush_forker(bdi)) { | ||
515 | struct bdi_writeback *wb = &bdi->wb; | ||
516 | |||
517 | wb->task = kthread_run(bdi_forker_thread, wb, "bdi-%s", | ||
518 | dev_name(dev)); | ||
519 | if (IS_ERR(wb->task)) | ||
520 | return PTR_ERR(wb->task); | ||
521 | } | ||
522 | |||
523 | bdi_debug_register(bdi, dev_name(dev)); | 331 | bdi_debug_register(bdi, dev_name(dev)); |
524 | set_bit(BDI_registered, &bdi->state); | 332 | set_bit(BDI_registered, &bdi->state); |
525 | 333 | ||
@@ -543,8 +351,6 @@ EXPORT_SYMBOL(bdi_register_dev); | |||
543 | */ | 351 | */ |
544 | static void bdi_wb_shutdown(struct backing_dev_info *bdi) | 352 | static void bdi_wb_shutdown(struct backing_dev_info *bdi) |
545 | { | 353 | { |
546 | struct task_struct *task; | ||
547 | |||
548 | if (!bdi_cap_writeback_dirty(bdi)) | 354 | if (!bdi_cap_writeback_dirty(bdi)) |
549 | return; | 355 | return; |
550 | 356 | ||
@@ -554,22 +360,20 @@ static void bdi_wb_shutdown(struct backing_dev_info *bdi) | |||
554 | bdi_remove_from_list(bdi); | 360 | bdi_remove_from_list(bdi); |
555 | 361 | ||
556 | /* | 362 | /* |
557 | * If setup is pending, wait for that to complete first | 363 | * Drain work list and shutdown the delayed_work. At this point, |
364 | * @bdi->bdi_list is empty telling bdi_Writeback_workfn() that @bdi | ||
365 | * is dying and its work_list needs to be drained no matter what. | ||
558 | */ | 366 | */ |
559 | wait_on_bit(&bdi->state, BDI_pending, bdi_sched_wait, | 367 | mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0); |
560 | TASK_UNINTERRUPTIBLE); | 368 | flush_delayed_work(&bdi->wb.dwork); |
369 | WARN_ON(!list_empty(&bdi->work_list)); | ||
561 | 370 | ||
562 | /* | 371 | /* |
563 | * Finally, kill the kernel thread. We don't need to be RCU | 372 | * This shouldn't be necessary unless @bdi for some reason has |
564 | * safe anymore, since the bdi is gone from visibility. | 373 | * unflushed dirty IO after work_list is drained. Do it anyway |
374 | * just in case. | ||
565 | */ | 375 | */ |
566 | spin_lock_bh(&bdi->wb_lock); | 376 | cancel_delayed_work_sync(&bdi->wb.dwork); |
567 | task = bdi->wb.task; | ||
568 | bdi->wb.task = NULL; | ||
569 | spin_unlock_bh(&bdi->wb_lock); | ||
570 | |||
571 | if (task) | ||
572 | kthread_stop(task); | ||
573 | } | 377 | } |
574 | 378 | ||
575 | /* | 379 | /* |
@@ -595,10 +399,8 @@ void bdi_unregister(struct backing_dev_info *bdi) | |||
595 | bdi_set_min_ratio(bdi, 0); | 399 | bdi_set_min_ratio(bdi, 0); |
596 | trace_writeback_bdi_unregister(bdi); | 400 | trace_writeback_bdi_unregister(bdi); |
597 | bdi_prune_sb(bdi); | 401 | bdi_prune_sb(bdi); |
598 | del_timer_sync(&bdi->wb.wakeup_timer); | ||
599 | 402 | ||
600 | if (!bdi_cap_flush_forker(bdi)) | 403 | bdi_wb_shutdown(bdi); |
601 | bdi_wb_shutdown(bdi); | ||
602 | bdi_debug_unregister(bdi); | 404 | bdi_debug_unregister(bdi); |
603 | 405 | ||
604 | spin_lock_bh(&bdi->wb_lock); | 406 | spin_lock_bh(&bdi->wb_lock); |
@@ -620,7 +422,7 @@ static void bdi_wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi) | |||
620 | INIT_LIST_HEAD(&wb->b_io); | 422 | INIT_LIST_HEAD(&wb->b_io); |
621 | INIT_LIST_HEAD(&wb->b_more_io); | 423 | INIT_LIST_HEAD(&wb->b_more_io); |
622 | spin_lock_init(&wb->list_lock); | 424 | spin_lock_init(&wb->list_lock); |
623 | setup_timer(&wb->wakeup_timer, wakeup_timer_fn, (unsigned long)bdi); | 425 | INIT_DELAYED_WORK(&wb->dwork, bdi_writeback_workfn); |
624 | } | 426 | } |
625 | 427 | ||
626 | /* | 428 | /* |
@@ -693,12 +495,11 @@ void bdi_destroy(struct backing_dev_info *bdi) | |||
693 | bdi_unregister(bdi); | 495 | bdi_unregister(bdi); |
694 | 496 | ||
695 | /* | 497 | /* |
696 | * If bdi_unregister() had already been called earlier, the | 498 | * If bdi_unregister() had already been called earlier, the dwork |
697 | * wakeup_timer could still be armed because bdi_prune_sb() | 499 | * could still be pending because bdi_prune_sb() can race with the |
698 | * can race with the bdi_wakeup_thread_delayed() calls from | 500 | * bdi_wakeup_thread_delayed() calls from __mark_inode_dirty(). |
699 | * __mark_inode_dirty(). | ||
700 | */ | 501 | */ |
701 | del_timer_sync(&bdi->wb.wakeup_timer); | 502 | cancel_delayed_work_sync(&bdi->wb.dwork); |
702 | 503 | ||
703 | for (i = 0; i < NR_BDI_STAT_ITEMS; i++) | 504 | for (i = 0; i < NR_BDI_STAT_ITEMS; i++) |
704 | percpu_counter_destroy(&bdi->bdi_stat[i]); | 505 | percpu_counter_destroy(&bdi->bdi_stat[i]); |