diff options
Diffstat (limited to 'fs/fs-writeback.c')
-rw-r--r-- | fs/fs-writeback.c | 641 |
1 files changed, 279 insertions, 362 deletions
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 4b37f7cea4dd..2f76c4a081a2 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
@@ -26,62 +26,38 @@ | |||
26 | #include <linux/blkdev.h> | 26 | #include <linux/blkdev.h> |
27 | #include <linux/backing-dev.h> | 27 | #include <linux/backing-dev.h> |
28 | #include <linux/buffer_head.h> | 28 | #include <linux/buffer_head.h> |
29 | #include <linux/tracepoint.h> | ||
29 | #include "internal.h" | 30 | #include "internal.h" |
30 | 31 | ||
31 | #define inode_to_bdi(inode) ((inode)->i_mapping->backing_dev_info) | ||
32 | |||
33 | /* | ||
34 | * We don't actually have pdflush, but this one is exported though /proc... | ||
35 | */ | ||
36 | int nr_pdflush_threads; | ||
37 | |||
38 | /* | 32 | /* |
39 | * Passed into wb_writeback(), essentially a subset of writeback_control | 33 | * Passed into wb_writeback(), essentially a subset of writeback_control |
40 | */ | 34 | */ |
41 | struct wb_writeback_args { | 35 | struct wb_writeback_work { |
42 | long nr_pages; | 36 | long nr_pages; |
43 | struct super_block *sb; | 37 | struct super_block *sb; |
44 | enum writeback_sync_modes sync_mode; | 38 | enum writeback_sync_modes sync_mode; |
45 | int for_kupdate:1; | 39 | unsigned int for_kupdate:1; |
46 | int range_cyclic:1; | 40 | unsigned int range_cyclic:1; |
47 | int for_background:1; | 41 | unsigned int for_background:1; |
48 | }; | ||
49 | 42 | ||
50 | /* | ||
51 | * Work items for the bdi_writeback threads | ||
52 | */ | ||
53 | struct bdi_work { | ||
54 | struct list_head list; /* pending work list */ | 43 | struct list_head list; /* pending work list */ |
55 | struct rcu_head rcu_head; /* for RCU free/clear of work */ | 44 | struct completion *done; /* set if the caller waits */ |
56 | |||
57 | unsigned long seen; /* threads that have seen this work */ | ||
58 | atomic_t pending; /* number of threads still to do work */ | ||
59 | |||
60 | struct wb_writeback_args args; /* writeback arguments */ | ||
61 | |||
62 | unsigned long state; /* flag bits, see WS_* */ | ||
63 | }; | ||
64 | |||
65 | enum { | ||
66 | WS_USED_B = 0, | ||
67 | WS_ONSTACK_B, | ||
68 | }; | 45 | }; |
69 | 46 | ||
70 | #define WS_USED (1 << WS_USED_B) | 47 | /* |
71 | #define WS_ONSTACK (1 << WS_ONSTACK_B) | 48 | * Include the creation of the trace points after defining the |
49 | * wb_writeback_work structure so that the definition remains local to this | ||
50 | * file. | ||
51 | */ | ||
52 | #define CREATE_TRACE_POINTS | ||
53 | #include <trace/events/writeback.h> | ||
72 | 54 | ||
73 | static inline bool bdi_work_on_stack(struct bdi_work *work) | 55 | #define inode_to_bdi(inode) ((inode)->i_mapping->backing_dev_info) |
74 | { | ||
75 | return test_bit(WS_ONSTACK_B, &work->state); | ||
76 | } | ||
77 | 56 | ||
78 | static inline void bdi_work_init(struct bdi_work *work, | 57 | /* |
79 | struct wb_writeback_args *args) | 58 | * We don't actually have pdflush, but this one is exported though /proc... |
80 | { | 59 | */ |
81 | INIT_RCU_HEAD(&work->rcu_head); | 60 | int nr_pdflush_threads; |
82 | work->args = *args; | ||
83 | work->state = WS_USED; | ||
84 | } | ||
85 | 61 | ||
86 | /** | 62 | /** |
87 | * writeback_in_progress - determine whether there is writeback in progress | 63 | * writeback_in_progress - determine whether there is writeback in progress |
@@ -95,183 +71,81 @@ int writeback_in_progress(struct backing_dev_info *bdi) | |||
95 | return !list_empty(&bdi->work_list); | 71 | return !list_empty(&bdi->work_list); |
96 | } | 72 | } |
97 | 73 | ||
98 | static void bdi_work_clear(struct bdi_work *work) | 74 | static void bdi_queue_work(struct backing_dev_info *bdi, |
99 | { | 75 | struct wb_writeback_work *work) |
100 | clear_bit(WS_USED_B, &work->state); | ||
101 | smp_mb__after_clear_bit(); | ||
102 | /* | ||
103 | * work can have disappeared at this point. bit waitq functions | ||
104 | * should be able to tolerate this, provided bdi_sched_wait does | ||
105 | * not dereference it's pointer argument. | ||
106 | */ | ||
107 | wake_up_bit(&work->state, WS_USED_B); | ||
108 | } | ||
109 | |||
110 | static void bdi_work_free(struct rcu_head *head) | ||
111 | { | ||
112 | struct bdi_work *work = container_of(head, struct bdi_work, rcu_head); | ||
113 | |||
114 | if (!bdi_work_on_stack(work)) | ||
115 | kfree(work); | ||
116 | else | ||
117 | bdi_work_clear(work); | ||
118 | } | ||
119 | |||
120 | static void wb_work_complete(struct bdi_work *work) | ||
121 | { | ||
122 | const enum writeback_sync_modes sync_mode = work->args.sync_mode; | ||
123 | int onstack = bdi_work_on_stack(work); | ||
124 | |||
125 | /* | ||
126 | * For allocated work, we can clear the done/seen bit right here. | ||
127 | * For on-stack work, we need to postpone both the clear and free | ||
128 | * to after the RCU grace period, since the stack could be invalidated | ||
129 | * as soon as bdi_work_clear() has done the wakeup. | ||
130 | */ | ||
131 | if (!onstack) | ||
132 | bdi_work_clear(work); | ||
133 | if (sync_mode == WB_SYNC_NONE || onstack) | ||
134 | call_rcu(&work->rcu_head, bdi_work_free); | ||
135 | } | ||
136 | |||
137 | static void wb_clear_pending(struct bdi_writeback *wb, struct bdi_work *work) | ||
138 | { | ||
139 | /* | ||
140 | * The caller has retrieved the work arguments from this work, | ||
141 | * drop our reference. If this is the last ref, delete and free it | ||
142 | */ | ||
143 | if (atomic_dec_and_test(&work->pending)) { | ||
144 | struct backing_dev_info *bdi = wb->bdi; | ||
145 | |||
146 | spin_lock(&bdi->wb_lock); | ||
147 | list_del_rcu(&work->list); | ||
148 | spin_unlock(&bdi->wb_lock); | ||
149 | |||
150 | wb_work_complete(work); | ||
151 | } | ||
152 | } | ||
153 | |||
154 | static void bdi_queue_work(struct backing_dev_info *bdi, struct bdi_work *work) | ||
155 | { | 76 | { |
156 | work->seen = bdi->wb_mask; | 77 | trace_writeback_queue(bdi, work); |
157 | BUG_ON(!work->seen); | ||
158 | atomic_set(&work->pending, bdi->wb_cnt); | ||
159 | BUG_ON(!bdi->wb_cnt); | ||
160 | 78 | ||
161 | /* | 79 | spin_lock_bh(&bdi->wb_lock); |
162 | * list_add_tail_rcu() contains the necessary barriers to | 80 | list_add_tail(&work->list, &bdi->work_list); |
163 | * make sure the above stores are seen before the item is | 81 | if (bdi->wb.task) { |
164 | * noticed on the list | 82 | wake_up_process(bdi->wb.task); |
165 | */ | 83 | } else { |
166 | spin_lock(&bdi->wb_lock); | 84 | /* |
167 | list_add_tail_rcu(&work->list, &bdi->work_list); | 85 | * The bdi thread isn't there, wake up the forker thread which |
168 | spin_unlock(&bdi->wb_lock); | 86 | * will create and run it. |
169 | 87 | */ | |
170 | /* | 88 | trace_writeback_nothread(bdi, work); |
171 | * If the default thread isn't there, make sure we add it. When | ||
172 | * it gets created and wakes up, we'll run this work. | ||
173 | */ | ||
174 | if (unlikely(list_empty_careful(&bdi->wb_list))) | ||
175 | wake_up_process(default_backing_dev_info.wb.task); | 89 | wake_up_process(default_backing_dev_info.wb.task); |
176 | else { | ||
177 | struct bdi_writeback *wb = &bdi->wb; | ||
178 | |||
179 | if (wb->task) | ||
180 | wake_up_process(wb->task); | ||
181 | } | 90 | } |
91 | spin_unlock_bh(&bdi->wb_lock); | ||
182 | } | 92 | } |
183 | 93 | ||
184 | /* | 94 | static void |
185 | * Used for on-stack allocated work items. The caller needs to wait until | 95 | __bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages, |
186 | * the wb threads have acked the work before it's safe to continue. | 96 | bool range_cyclic, bool for_background) |
187 | */ | ||
188 | static void bdi_wait_on_work_clear(struct bdi_work *work) | ||
189 | { | ||
190 | wait_on_bit(&work->state, WS_USED_B, bdi_sched_wait, | ||
191 | TASK_UNINTERRUPTIBLE); | ||
192 | } | ||
193 | |||
194 | static void bdi_alloc_queue_work(struct backing_dev_info *bdi, | ||
195 | struct wb_writeback_args *args) | ||
196 | { | 97 | { |
197 | struct bdi_work *work; | 98 | struct wb_writeback_work *work; |
198 | 99 | ||
199 | /* | 100 | /* |
200 | * This is WB_SYNC_NONE writeback, so if allocation fails just | 101 | * This is WB_SYNC_NONE writeback, so if allocation fails just |
201 | * wakeup the thread for old dirty data writeback | 102 | * wakeup the thread for old dirty data writeback |
202 | */ | 103 | */ |
203 | work = kmalloc(sizeof(*work), GFP_ATOMIC); | 104 | work = kzalloc(sizeof(*work), GFP_ATOMIC); |
204 | if (work) { | 105 | if (!work) { |
205 | bdi_work_init(work, args); | 106 | if (bdi->wb.task) { |
206 | bdi_queue_work(bdi, work); | 107 | trace_writeback_nowork(bdi); |
207 | } else { | 108 | wake_up_process(bdi->wb.task); |
208 | struct bdi_writeback *wb = &bdi->wb; | 109 | } |
209 | 110 | return; | |
210 | if (wb->task) | ||
211 | wake_up_process(wb->task); | ||
212 | } | 111 | } |
112 | |||
113 | work->sync_mode = WB_SYNC_NONE; | ||
114 | work->nr_pages = nr_pages; | ||
115 | work->range_cyclic = range_cyclic; | ||
116 | work->for_background = for_background; | ||
117 | |||
118 | bdi_queue_work(bdi, work); | ||
213 | } | 119 | } |
214 | 120 | ||
215 | /** | 121 | /** |
216 | * bdi_sync_writeback - start and wait for writeback | 122 | * bdi_start_writeback - start writeback |
217 | * @bdi: the backing device to write from | 123 | * @bdi: the backing device to write from |
218 | * @sb: write inodes from this super_block | 124 | * @nr_pages: the number of pages to write |
219 | * | 125 | * |
220 | * Description: | 126 | * Description: |
221 | * This does WB_SYNC_ALL data integrity writeback and waits for the | 127 | * This does WB_SYNC_NONE opportunistic writeback. The IO is only |
222 | * IO to complete. Callers must hold the sb s_umount semaphore for | 128 | * started when this function returns, we make no guarentees on |
223 | * reading, to avoid having the super disappear before we are done. | 129 | * completion. Caller need not hold sb s_umount semaphore. |
130 | * | ||
224 | */ | 131 | */ |
225 | static void bdi_sync_writeback(struct backing_dev_info *bdi, | 132 | void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages) |
226 | struct super_block *sb) | ||
227 | { | 133 | { |
228 | struct wb_writeback_args args = { | 134 | __bdi_start_writeback(bdi, nr_pages, true, false); |
229 | .sb = sb, | ||
230 | .sync_mode = WB_SYNC_ALL, | ||
231 | .nr_pages = LONG_MAX, | ||
232 | .range_cyclic = 0, | ||
233 | }; | ||
234 | struct bdi_work work; | ||
235 | |||
236 | bdi_work_init(&work, &args); | ||
237 | work.state |= WS_ONSTACK; | ||
238 | |||
239 | bdi_queue_work(bdi, &work); | ||
240 | bdi_wait_on_work_clear(&work); | ||
241 | } | 135 | } |
242 | 136 | ||
243 | /** | 137 | /** |
244 | * bdi_start_writeback - start writeback | 138 | * bdi_start_background_writeback - start background writeback |
245 | * @bdi: the backing device to write from | 139 | * @bdi: the backing device to write from |
246 | * @sb: write inodes from this super_block | ||
247 | * @nr_pages: the number of pages to write | ||
248 | * | 140 | * |
249 | * Description: | 141 | * Description: |
250 | * This does WB_SYNC_NONE opportunistic writeback. The IO is only | 142 | * This does WB_SYNC_NONE background writeback. The IO is only |
251 | * started when this function returns, we make no guarentees on | 143 | * started when this function returns, we make no guarentees on |
252 | * completion. Caller need not hold sb s_umount semaphore. | 144 | * completion. Caller need not hold sb s_umount semaphore. |
253 | * | ||
254 | */ | 145 | */ |
255 | void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb, | 146 | void bdi_start_background_writeback(struct backing_dev_info *bdi) |
256 | long nr_pages) | ||
257 | { | 147 | { |
258 | struct wb_writeback_args args = { | 148 | __bdi_start_writeback(bdi, LONG_MAX, true, true); |
259 | .sb = sb, | ||
260 | .sync_mode = WB_SYNC_NONE, | ||
261 | .nr_pages = nr_pages, | ||
262 | .range_cyclic = 1, | ||
263 | }; | ||
264 | |||
265 | /* | ||
266 | * We treat @nr_pages=0 as the special case to do background writeback, | ||
267 | * ie. to sync pages until the background dirty threshold is reached. | ||
268 | */ | ||
269 | if (!nr_pages) { | ||
270 | args.nr_pages = LONG_MAX; | ||
271 | args.for_background = 1; | ||
272 | } | ||
273 | |||
274 | bdi_alloc_queue_work(bdi, &args); | ||
275 | } | 149 | } |
276 | 150 | ||
277 | /* | 151 | /* |
@@ -398,11 +272,11 @@ static void inode_wait_for_writeback(struct inode *inode) | |||
398 | wait_queue_head_t *wqh; | 272 | wait_queue_head_t *wqh; |
399 | 273 | ||
400 | wqh = bit_waitqueue(&inode->i_state, __I_SYNC); | 274 | wqh = bit_waitqueue(&inode->i_state, __I_SYNC); |
401 | do { | 275 | while (inode->i_state & I_SYNC) { |
402 | spin_unlock(&inode_lock); | 276 | spin_unlock(&inode_lock); |
403 | __wait_on_bit(wqh, &wq, inode_wait, TASK_UNINTERRUPTIBLE); | 277 | __wait_on_bit(wqh, &wq, inode_wait, TASK_UNINTERRUPTIBLE); |
404 | spin_lock(&inode_lock); | 278 | spin_lock(&inode_lock); |
405 | } while (inode->i_state & I_SYNC); | 279 | } |
406 | } | 280 | } |
407 | 281 | ||
408 | /* | 282 | /* |
@@ -452,11 +326,9 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
452 | 326 | ||
453 | BUG_ON(inode->i_state & I_SYNC); | 327 | BUG_ON(inode->i_state & I_SYNC); |
454 | 328 | ||
455 | /* Set I_SYNC, reset I_DIRTY */ | 329 | /* Set I_SYNC, reset I_DIRTY_PAGES */ |
456 | dirty = inode->i_state & I_DIRTY; | ||
457 | inode->i_state |= I_SYNC; | 330 | inode->i_state |= I_SYNC; |
458 | inode->i_state &= ~I_DIRTY; | 331 | inode->i_state &= ~I_DIRTY_PAGES; |
459 | |||
460 | spin_unlock(&inode_lock); | 332 | spin_unlock(&inode_lock); |
461 | 333 | ||
462 | ret = do_writepages(mapping, wbc); | 334 | ret = do_writepages(mapping, wbc); |
@@ -472,6 +344,15 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
472 | ret = err; | 344 | ret = err; |
473 | } | 345 | } |
474 | 346 | ||
347 | /* | ||
348 | * Some filesystems may redirty the inode during the writeback | ||
349 | * due to delalloc, clear dirty metadata flags right before | ||
350 | * write_inode() | ||
351 | */ | ||
352 | spin_lock(&inode_lock); | ||
353 | dirty = inode->i_state & I_DIRTY; | ||
354 | inode->i_state &= ~(I_DIRTY_SYNC | I_DIRTY_DATASYNC); | ||
355 | spin_unlock(&inode_lock); | ||
475 | /* Don't write the inode if only I_DIRTY_PAGES was set */ | 356 | /* Don't write the inode if only I_DIRTY_PAGES was set */ |
476 | if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) { | 357 | if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) { |
477 | int err = write_inode(inode, wbc); | 358 | int err = write_inode(inode, wbc); |
@@ -481,7 +362,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
481 | 362 | ||
482 | spin_lock(&inode_lock); | 363 | spin_lock(&inode_lock); |
483 | inode->i_state &= ~I_SYNC; | 364 | inode->i_state &= ~I_SYNC; |
484 | if (!(inode->i_state & (I_FREEING | I_CLEAR))) { | 365 | if (!(inode->i_state & I_FREEING)) { |
485 | if ((inode->i_state & I_DIRTY_PAGES) && wbc->for_kupdate) { | 366 | if ((inode->i_state & I_DIRTY_PAGES) && wbc->for_kupdate) { |
486 | /* | 367 | /* |
487 | * More pages get dirtied by a fast dirtier. | 368 | * More pages get dirtied by a fast dirtier. |
@@ -554,75 +435,69 @@ select_queue: | |||
554 | return ret; | 435 | return ret; |
555 | } | 436 | } |
556 | 437 | ||
557 | static void unpin_sb_for_writeback(struct super_block *sb) | ||
558 | { | ||
559 | up_read(&sb->s_umount); | ||
560 | put_super(sb); | ||
561 | } | ||
562 | |||
563 | enum sb_pin_state { | ||
564 | SB_PINNED, | ||
565 | SB_NOT_PINNED, | ||
566 | SB_PIN_FAILED | ||
567 | }; | ||
568 | |||
569 | /* | 438 | /* |
570 | * For WB_SYNC_NONE writeback, the caller does not have the sb pinned | 439 | * For background writeback the caller does not have the sb pinned |
571 | * before calling writeback. So make sure that we do pin it, so it doesn't | 440 | * before calling writeback. So make sure that we do pin it, so it doesn't |
572 | * go away while we are writing inodes from it. | 441 | * go away while we are writing inodes from it. |
573 | */ | 442 | */ |
574 | static enum sb_pin_state pin_sb_for_writeback(struct writeback_control *wbc, | 443 | static bool pin_sb_for_writeback(struct super_block *sb) |
575 | struct super_block *sb) | ||
576 | { | 444 | { |
577 | /* | ||
578 | * Caller must already hold the ref for this | ||
579 | */ | ||
580 | if (wbc->sync_mode == WB_SYNC_ALL) { | ||
581 | WARN_ON(!rwsem_is_locked(&sb->s_umount)); | ||
582 | return SB_NOT_PINNED; | ||
583 | } | ||
584 | spin_lock(&sb_lock); | 445 | spin_lock(&sb_lock); |
446 | if (list_empty(&sb->s_instances)) { | ||
447 | spin_unlock(&sb_lock); | ||
448 | return false; | ||
449 | } | ||
450 | |||
585 | sb->s_count++; | 451 | sb->s_count++; |
452 | spin_unlock(&sb_lock); | ||
453 | |||
586 | if (down_read_trylock(&sb->s_umount)) { | 454 | if (down_read_trylock(&sb->s_umount)) { |
587 | if (sb->s_root) { | 455 | if (sb->s_root) |
588 | spin_unlock(&sb_lock); | 456 | return true; |
589 | return SB_PINNED; | ||
590 | } | ||
591 | /* | ||
592 | * umounted, drop rwsem again and fall through to failure | ||
593 | */ | ||
594 | up_read(&sb->s_umount); | 457 | up_read(&sb->s_umount); |
595 | } | 458 | } |
596 | sb->s_count--; | 459 | |
597 | spin_unlock(&sb_lock); | 460 | put_super(sb); |
598 | return SB_PIN_FAILED; | 461 | return false; |
599 | } | 462 | } |
600 | 463 | ||
601 | /* | 464 | /* |
602 | * Write a portion of b_io inodes which belong to @sb. | 465 | * Write a portion of b_io inodes which belong to @sb. |
603 | * If @wbc->sb != NULL, then find and write all such | 466 | * |
467 | * If @only_this_sb is true, then find and write all such | ||
604 | * inodes. Otherwise write only ones which go sequentially | 468 | * inodes. Otherwise write only ones which go sequentially |
605 | * in reverse order. | 469 | * in reverse order. |
470 | * | ||
606 | * Return 1, if the caller writeback routine should be | 471 | * Return 1, if the caller writeback routine should be |
607 | * interrupted. Otherwise return 0. | 472 | * interrupted. Otherwise return 0. |
608 | */ | 473 | */ |
609 | static int writeback_sb_inodes(struct super_block *sb, | 474 | static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb, |
610 | struct bdi_writeback *wb, | 475 | struct writeback_control *wbc, bool only_this_sb) |
611 | struct writeback_control *wbc) | ||
612 | { | 476 | { |
613 | while (!list_empty(&wb->b_io)) { | 477 | while (!list_empty(&wb->b_io)) { |
614 | long pages_skipped; | 478 | long pages_skipped; |
615 | struct inode *inode = list_entry(wb->b_io.prev, | 479 | struct inode *inode = list_entry(wb->b_io.prev, |
616 | struct inode, i_list); | 480 | struct inode, i_list); |
617 | if (wbc->sb && sb != inode->i_sb) { | 481 | |
618 | /* super block given and doesn't | 482 | if (inode->i_sb != sb) { |
619 | match, skip this inode */ | 483 | if (only_this_sb) { |
620 | redirty_tail(inode); | 484 | /* |
621 | continue; | 485 | * We only want to write back data for this |
622 | } | 486 | * superblock, move all inodes not belonging |
623 | if (sb != inode->i_sb) | 487 | * to it back onto the dirty list. |
624 | /* finish with this superblock */ | 488 | */ |
489 | redirty_tail(inode); | ||
490 | continue; | ||
491 | } | ||
492 | |||
493 | /* | ||
494 | * The inode belongs to a different superblock. | ||
495 | * Bounce back to the caller to unpin this and | ||
496 | * pin the next superblock. | ||
497 | */ | ||
625 | return 0; | 498 | return 0; |
499 | } | ||
500 | |||
626 | if (inode->i_state & (I_NEW | I_WILL_FREE)) { | 501 | if (inode->i_state & (I_NEW | I_WILL_FREE)) { |
627 | requeue_io(inode); | 502 | requeue_io(inode); |
628 | continue; | 503 | continue; |
@@ -634,7 +509,7 @@ static int writeback_sb_inodes(struct super_block *sb, | |||
634 | if (inode_dirtied_after(inode, wbc->wb_start)) | 509 | if (inode_dirtied_after(inode, wbc->wb_start)) |
635 | return 1; | 510 | return 1; |
636 | 511 | ||
637 | BUG_ON(inode->i_state & (I_FREEING | I_CLEAR)); | 512 | BUG_ON(inode->i_state & I_FREEING); |
638 | __iget(inode); | 513 | __iget(inode); |
639 | pages_skipped = wbc->pages_skipped; | 514 | pages_skipped = wbc->pages_skipped; |
640 | writeback_single_inode(inode, wbc); | 515 | writeback_single_inode(inode, wbc); |
@@ -660,12 +535,13 @@ static int writeback_sb_inodes(struct super_block *sb, | |||
660 | return 1; | 535 | return 1; |
661 | } | 536 | } |
662 | 537 | ||
663 | static void writeback_inodes_wb(struct bdi_writeback *wb, | 538 | void writeback_inodes_wb(struct bdi_writeback *wb, |
664 | struct writeback_control *wbc) | 539 | struct writeback_control *wbc) |
665 | { | 540 | { |
666 | int ret = 0; | 541 | int ret = 0; |
667 | 542 | ||
668 | wbc->wb_start = jiffies; /* livelock avoidance */ | 543 | if (!wbc->wb_start) |
544 | wbc->wb_start = jiffies; /* livelock avoidance */ | ||
669 | spin_lock(&inode_lock); | 545 | spin_lock(&inode_lock); |
670 | if (!wbc->for_kupdate || list_empty(&wb->b_io)) | 546 | if (!wbc->for_kupdate || list_empty(&wb->b_io)) |
671 | queue_io(wb, wbc->older_than_this); | 547 | queue_io(wb, wbc->older_than_this); |
@@ -674,24 +550,14 @@ static void writeback_inodes_wb(struct bdi_writeback *wb, | |||
674 | struct inode *inode = list_entry(wb->b_io.prev, | 550 | struct inode *inode = list_entry(wb->b_io.prev, |
675 | struct inode, i_list); | 551 | struct inode, i_list); |
676 | struct super_block *sb = inode->i_sb; | 552 | struct super_block *sb = inode->i_sb; |
677 | enum sb_pin_state state; | ||
678 | |||
679 | if (wbc->sb && sb != wbc->sb) { | ||
680 | /* super block given and doesn't | ||
681 | match, skip this inode */ | ||
682 | redirty_tail(inode); | ||
683 | continue; | ||
684 | } | ||
685 | state = pin_sb_for_writeback(wbc, sb); | ||
686 | 553 | ||
687 | if (state == SB_PIN_FAILED) { | 554 | if (!pin_sb_for_writeback(sb)) { |
688 | requeue_io(inode); | 555 | requeue_io(inode); |
689 | continue; | 556 | continue; |
690 | } | 557 | } |
691 | ret = writeback_sb_inodes(sb, wb, wbc); | 558 | ret = writeback_sb_inodes(sb, wb, wbc, false); |
559 | drop_super(sb); | ||
692 | 560 | ||
693 | if (state == SB_PINNED) | ||
694 | unpin_sb_for_writeback(sb); | ||
695 | if (ret) | 561 | if (ret) |
696 | break; | 562 | break; |
697 | } | 563 | } |
@@ -699,11 +565,16 @@ static void writeback_inodes_wb(struct bdi_writeback *wb, | |||
699 | /* Leave any unwritten inodes on b_io */ | 565 | /* Leave any unwritten inodes on b_io */ |
700 | } | 566 | } |
701 | 567 | ||
702 | void writeback_inodes_wbc(struct writeback_control *wbc) | 568 | static void __writeback_inodes_sb(struct super_block *sb, |
569 | struct bdi_writeback *wb, struct writeback_control *wbc) | ||
703 | { | 570 | { |
704 | struct backing_dev_info *bdi = wbc->bdi; | 571 | WARN_ON(!rwsem_is_locked(&sb->s_umount)); |
705 | 572 | ||
706 | writeback_inodes_wb(&bdi->wb, wbc); | 573 | spin_lock(&inode_lock); |
574 | if (!wbc->for_kupdate || list_empty(&wb->b_io)) | ||
575 | queue_io(wb, wbc->older_than_this); | ||
576 | writeback_sb_inodes(sb, wb, wbc, true); | ||
577 | spin_unlock(&inode_lock); | ||
707 | } | 578 | } |
708 | 579 | ||
709 | /* | 580 | /* |
@@ -741,16 +612,14 @@ static inline bool over_bground_thresh(void) | |||
741 | * all dirty pages if they are all attached to "old" mappings. | 612 | * all dirty pages if they are all attached to "old" mappings. |
742 | */ | 613 | */ |
743 | static long wb_writeback(struct bdi_writeback *wb, | 614 | static long wb_writeback(struct bdi_writeback *wb, |
744 | struct wb_writeback_args *args) | 615 | struct wb_writeback_work *work) |
745 | { | 616 | { |
746 | struct writeback_control wbc = { | 617 | struct writeback_control wbc = { |
747 | .bdi = wb->bdi, | 618 | .sync_mode = work->sync_mode, |
748 | .sb = args->sb, | ||
749 | .sync_mode = args->sync_mode, | ||
750 | .older_than_this = NULL, | 619 | .older_than_this = NULL, |
751 | .for_kupdate = args->for_kupdate, | 620 | .for_kupdate = work->for_kupdate, |
752 | .for_background = args->for_background, | 621 | .for_background = work->for_background, |
753 | .range_cyclic = args->range_cyclic, | 622 | .range_cyclic = work->range_cyclic, |
754 | }; | 623 | }; |
755 | unsigned long oldest_jif; | 624 | unsigned long oldest_jif; |
756 | long wrote = 0; | 625 | long wrote = 0; |
@@ -766,25 +635,33 @@ static long wb_writeback(struct bdi_writeback *wb, | |||
766 | wbc.range_end = LLONG_MAX; | 635 | wbc.range_end = LLONG_MAX; |
767 | } | 636 | } |
768 | 637 | ||
638 | wbc.wb_start = jiffies; /* livelock avoidance */ | ||
769 | for (;;) { | 639 | for (;;) { |
770 | /* | 640 | /* |
771 | * Stop writeback when nr_pages has been consumed | 641 | * Stop writeback when nr_pages has been consumed |
772 | */ | 642 | */ |
773 | if (args->nr_pages <= 0) | 643 | if (work->nr_pages <= 0) |
774 | break; | 644 | break; |
775 | 645 | ||
776 | /* | 646 | /* |
777 | * For background writeout, stop when we are below the | 647 | * For background writeout, stop when we are below the |
778 | * background dirty threshold | 648 | * background dirty threshold |
779 | */ | 649 | */ |
780 | if (args->for_background && !over_bground_thresh()) | 650 | if (work->for_background && !over_bground_thresh()) |
781 | break; | 651 | break; |
782 | 652 | ||
783 | wbc.more_io = 0; | 653 | wbc.more_io = 0; |
784 | wbc.nr_to_write = MAX_WRITEBACK_PAGES; | 654 | wbc.nr_to_write = MAX_WRITEBACK_PAGES; |
785 | wbc.pages_skipped = 0; | 655 | wbc.pages_skipped = 0; |
786 | writeback_inodes_wb(wb, &wbc); | 656 | |
787 | args->nr_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write; | 657 | trace_wbc_writeback_start(&wbc, wb->bdi); |
658 | if (work->sb) | ||
659 | __writeback_inodes_sb(work->sb, wb, &wbc); | ||
660 | else | ||
661 | writeback_inodes_wb(wb, &wbc); | ||
662 | trace_wbc_writeback_written(&wbc, wb->bdi); | ||
663 | |||
664 | work->nr_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write; | ||
788 | wrote += MAX_WRITEBACK_PAGES - wbc.nr_to_write; | 665 | wrote += MAX_WRITEBACK_PAGES - wbc.nr_to_write; |
789 | 666 | ||
790 | /* | 667 | /* |
@@ -811,6 +688,7 @@ static long wb_writeback(struct bdi_writeback *wb, | |||
811 | if (!list_empty(&wb->b_more_io)) { | 688 | if (!list_empty(&wb->b_more_io)) { |
812 | inode = list_entry(wb->b_more_io.prev, | 689 | inode = list_entry(wb->b_more_io.prev, |
813 | struct inode, i_list); | 690 | struct inode, i_list); |
691 | trace_wbc_writeback_wait(&wbc, wb->bdi); | ||
814 | inode_wait_for_writeback(inode); | 692 | inode_wait_for_writeback(inode); |
815 | } | 693 | } |
816 | spin_unlock(&inode_lock); | 694 | spin_unlock(&inode_lock); |
@@ -820,31 +698,21 @@ static long wb_writeback(struct bdi_writeback *wb, | |||
820 | } | 698 | } |
821 | 699 | ||
822 | /* | 700 | /* |
823 | * Return the next bdi_work struct that hasn't been processed by this | 701 | * Return the next wb_writeback_work struct that hasn't been processed yet. |
824 | * wb thread yet. ->seen is initially set for each thread that exists | ||
825 | * for this device, when a thread first notices a piece of work it | ||
826 | * clears its bit. Depending on writeback type, the thread will notify | ||
827 | * completion on either receiving the work (WB_SYNC_NONE) or after | ||
828 | * it is done (WB_SYNC_ALL). | ||
829 | */ | 702 | */ |
830 | static struct bdi_work *get_next_work_item(struct backing_dev_info *bdi, | 703 | static struct wb_writeback_work * |
831 | struct bdi_writeback *wb) | 704 | get_next_work_item(struct backing_dev_info *bdi) |
832 | { | 705 | { |
833 | struct bdi_work *work, *ret = NULL; | 706 | struct wb_writeback_work *work = NULL; |
834 | |||
835 | rcu_read_lock(); | ||
836 | |||
837 | list_for_each_entry_rcu(work, &bdi->work_list, list) { | ||
838 | if (!test_bit(wb->nr, &work->seen)) | ||
839 | continue; | ||
840 | clear_bit(wb->nr, &work->seen); | ||
841 | 707 | ||
842 | ret = work; | 708 | spin_lock_bh(&bdi->wb_lock); |
843 | break; | 709 | if (!list_empty(&bdi->work_list)) { |
710 | work = list_entry(bdi->work_list.next, | ||
711 | struct wb_writeback_work, list); | ||
712 | list_del_init(&work->list); | ||
844 | } | 713 | } |
845 | 714 | spin_unlock_bh(&bdi->wb_lock); | |
846 | rcu_read_unlock(); | 715 | return work; |
847 | return ret; | ||
848 | } | 716 | } |
849 | 717 | ||
850 | static long wb_check_old_data_flush(struct bdi_writeback *wb) | 718 | static long wb_check_old_data_flush(struct bdi_writeback *wb) |
@@ -852,6 +720,12 @@ static long wb_check_old_data_flush(struct bdi_writeback *wb) | |||
852 | unsigned long expired; | 720 | unsigned long expired; |
853 | long nr_pages; | 721 | long nr_pages; |
854 | 722 | ||
723 | /* | ||
724 | * When set to zero, disable periodic writeback | ||
725 | */ | ||
726 | if (!dirty_writeback_interval) | ||
727 | return 0; | ||
728 | |||
855 | expired = wb->last_old_flush + | 729 | expired = wb->last_old_flush + |
856 | msecs_to_jiffies(dirty_writeback_interval * 10); | 730 | msecs_to_jiffies(dirty_writeback_interval * 10); |
857 | if (time_before(jiffies, expired)) | 731 | if (time_before(jiffies, expired)) |
@@ -863,14 +737,14 @@ static long wb_check_old_data_flush(struct bdi_writeback *wb) | |||
863 | (inodes_stat.nr_inodes - inodes_stat.nr_unused); | 737 | (inodes_stat.nr_inodes - inodes_stat.nr_unused); |
864 | 738 | ||
865 | if (nr_pages) { | 739 | if (nr_pages) { |
866 | struct wb_writeback_args args = { | 740 | struct wb_writeback_work work = { |
867 | .nr_pages = nr_pages, | 741 | .nr_pages = nr_pages, |
868 | .sync_mode = WB_SYNC_NONE, | 742 | .sync_mode = WB_SYNC_NONE, |
869 | .for_kupdate = 1, | 743 | .for_kupdate = 1, |
870 | .range_cyclic = 1, | 744 | .range_cyclic = 1, |
871 | }; | 745 | }; |
872 | 746 | ||
873 | return wb_writeback(wb, &args); | 747 | return wb_writeback(wb, &work); |
874 | } | 748 | } |
875 | 749 | ||
876 | return 0; | 750 | return 0; |
@@ -882,33 +756,29 @@ static long wb_check_old_data_flush(struct bdi_writeback *wb) | |||
882 | long wb_do_writeback(struct bdi_writeback *wb, int force_wait) | 756 | long wb_do_writeback(struct bdi_writeback *wb, int force_wait) |
883 | { | 757 | { |
884 | struct backing_dev_info *bdi = wb->bdi; | 758 | struct backing_dev_info *bdi = wb->bdi; |
885 | struct bdi_work *work; | 759 | struct wb_writeback_work *work; |
886 | long wrote = 0; | 760 | long wrote = 0; |
887 | 761 | ||
888 | while ((work = get_next_work_item(bdi, wb)) != NULL) { | 762 | while ((work = get_next_work_item(bdi)) != NULL) { |
889 | struct wb_writeback_args args = work->args; | ||
890 | |||
891 | /* | 763 | /* |
892 | * Override sync mode, in case we must wait for completion | 764 | * Override sync mode, in case we must wait for completion |
765 | * because this thread is exiting now. | ||
893 | */ | 766 | */ |
894 | if (force_wait) | 767 | if (force_wait) |
895 | work->args.sync_mode = args.sync_mode = WB_SYNC_ALL; | 768 | work->sync_mode = WB_SYNC_ALL; |
896 | 769 | ||
897 | /* | 770 | trace_writeback_exec(bdi, work); |
898 | * If this isn't a data integrity operation, just notify | ||
899 | * that we have seen this work and we are now starting it. | ||
900 | */ | ||
901 | if (args.sync_mode == WB_SYNC_NONE) | ||
902 | wb_clear_pending(wb, work); | ||
903 | 771 | ||
904 | wrote += wb_writeback(wb, &args); | 772 | wrote += wb_writeback(wb, work); |
905 | 773 | ||
906 | /* | 774 | /* |
907 | * This is a data integrity writeback, so only do the | 775 | * Notify the caller of completion if this is a synchronous |
908 | * notification when we have completed the work. | 776 | * work item, otherwise just free it. |
909 | */ | 777 | */ |
910 | if (args.sync_mode == WB_SYNC_ALL) | 778 | if (work->done) |
911 | wb_clear_pending(wb, work); | 779 | complete(work->done); |
780 | else | ||
781 | kfree(work); | ||
912 | } | 782 | } |
913 | 783 | ||
914 | /* | 784 | /* |
@@ -923,75 +793,88 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait) | |||
923 | * Handle writeback of dirty data for the device backed by this bdi. Also | 793 | * Handle writeback of dirty data for the device backed by this bdi. Also |
924 | * wakes up periodically and does kupdated style flushing. | 794 | * wakes up periodically and does kupdated style flushing. |
925 | */ | 795 | */ |
926 | int bdi_writeback_task(struct bdi_writeback *wb) | 796 | int bdi_writeback_thread(void *data) |
927 | { | 797 | { |
928 | unsigned long last_active = jiffies; | 798 | struct bdi_writeback *wb = data; |
929 | unsigned long wait_jiffies = -1UL; | 799 | struct backing_dev_info *bdi = wb->bdi; |
930 | long pages_written; | 800 | long pages_written; |
931 | 801 | ||
802 | current->flags |= PF_FLUSHER | PF_SWAPWRITE; | ||
803 | set_freezable(); | ||
804 | wb->last_active = jiffies; | ||
805 | |||
806 | /* | ||
807 | * Our parent may run at a different priority, just set us to normal | ||
808 | */ | ||
809 | set_user_nice(current, 0); | ||
810 | |||
811 | trace_writeback_thread_start(bdi); | ||
812 | |||
932 | while (!kthread_should_stop()) { | 813 | while (!kthread_should_stop()) { |
814 | /* | ||
815 | * Remove own delayed wake-up timer, since we are already awake | ||
816 | * and we'll take care of the preriodic write-back. | ||
817 | */ | ||
818 | del_timer(&wb->wakeup_timer); | ||
819 | |||
933 | pages_written = wb_do_writeback(wb, 0); | 820 | pages_written = wb_do_writeback(wb, 0); |
934 | 821 | ||
822 | trace_writeback_pages_written(pages_written); | ||
823 | |||
935 | if (pages_written) | 824 | if (pages_written) |
936 | last_active = jiffies; | 825 | wb->last_active = jiffies; |
937 | else if (wait_jiffies != -1UL) { | 826 | |
938 | unsigned long max_idle; | 827 | set_current_state(TASK_INTERRUPTIBLE); |
828 | if (!list_empty(&bdi->work_list)) { | ||
829 | __set_current_state(TASK_RUNNING); | ||
830 | continue; | ||
831 | } | ||
939 | 832 | ||
833 | if (wb_has_dirty_io(wb) && dirty_writeback_interval) | ||
834 | schedule_timeout(msecs_to_jiffies(dirty_writeback_interval * 10)); | ||
835 | else { | ||
940 | /* | 836 | /* |
941 | * Longest period of inactivity that we tolerate. If we | 837 | * We have nothing to do, so can go sleep without any |
942 | * see dirty data again later, the task will get | 838 | * timeout and save power. When a work is queued or |
943 | * recreated automatically. | 839 | * something is made dirty - we will be woken up. |
944 | */ | 840 | */ |
945 | max_idle = max(5UL * 60 * HZ, wait_jiffies); | 841 | schedule(); |
946 | if (time_after(jiffies, max_idle + last_active)) | ||
947 | break; | ||
948 | } | 842 | } |
949 | 843 | ||
950 | wait_jiffies = msecs_to_jiffies(dirty_writeback_interval * 10); | ||
951 | schedule_timeout_interruptible(wait_jiffies); | ||
952 | try_to_freeze(); | 844 | try_to_freeze(); |
953 | } | 845 | } |
954 | 846 | ||
847 | /* Flush any work that raced with us exiting */ | ||
848 | if (!list_empty(&bdi->work_list)) | ||
849 | wb_do_writeback(wb, 1); | ||
850 | |||
851 | trace_writeback_thread_stop(bdi); | ||
955 | return 0; | 852 | return 0; |
956 | } | 853 | } |
957 | 854 | ||
855 | |||
958 | /* | 856 | /* |
959 | * Schedule writeback for all backing devices. This does WB_SYNC_NONE | 857 | * Start writeback of `nr_pages' pages. If `nr_pages' is zero, write back |
960 | * writeback, for integrity writeback see bdi_sync_writeback(). | 858 | * the whole world. |
961 | */ | 859 | */ |
962 | static void bdi_writeback_all(struct super_block *sb, long nr_pages) | 860 | void wakeup_flusher_threads(long nr_pages) |
963 | { | 861 | { |
964 | struct wb_writeback_args args = { | ||
965 | .sb = sb, | ||
966 | .nr_pages = nr_pages, | ||
967 | .sync_mode = WB_SYNC_NONE, | ||
968 | }; | ||
969 | struct backing_dev_info *bdi; | 862 | struct backing_dev_info *bdi; |
970 | 863 | ||
971 | rcu_read_lock(); | 864 | if (!nr_pages) { |
865 | nr_pages = global_page_state(NR_FILE_DIRTY) + | ||
866 | global_page_state(NR_UNSTABLE_NFS); | ||
867 | } | ||
972 | 868 | ||
869 | rcu_read_lock(); | ||
973 | list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) { | 870 | list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) { |
974 | if (!bdi_has_dirty_io(bdi)) | 871 | if (!bdi_has_dirty_io(bdi)) |
975 | continue; | 872 | continue; |
976 | 873 | __bdi_start_writeback(bdi, nr_pages, false, false); | |
977 | bdi_alloc_queue_work(bdi, &args); | ||
978 | } | 874 | } |
979 | |||
980 | rcu_read_unlock(); | 875 | rcu_read_unlock(); |
981 | } | 876 | } |
982 | 877 | ||
983 | /* | ||
984 | * Start writeback of `nr_pages' pages. If `nr_pages' is zero, write back | ||
985 | * the whole world. | ||
986 | */ | ||
987 | void wakeup_flusher_threads(long nr_pages) | ||
988 | { | ||
989 | if (nr_pages == 0) | ||
990 | nr_pages = global_page_state(NR_FILE_DIRTY) + | ||
991 | global_page_state(NR_UNSTABLE_NFS); | ||
992 | bdi_writeback_all(NULL, nr_pages); | ||
993 | } | ||
994 | |||
995 | static noinline void block_dump___mark_inode_dirty(struct inode *inode) | 878 | static noinline void block_dump___mark_inode_dirty(struct inode *inode) |
996 | { | 879 | { |
997 | if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) { | 880 | if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) { |
@@ -1044,6 +927,8 @@ static noinline void block_dump___mark_inode_dirty(struct inode *inode) | |||
1044 | void __mark_inode_dirty(struct inode *inode, int flags) | 927 | void __mark_inode_dirty(struct inode *inode, int flags) |
1045 | { | 928 | { |
1046 | struct super_block *sb = inode->i_sb; | 929 | struct super_block *sb = inode->i_sb; |
930 | struct backing_dev_info *bdi = NULL; | ||
931 | bool wakeup_bdi = false; | ||
1047 | 932 | ||
1048 | /* | 933 | /* |
1049 | * Don't do this for I_DIRTY_PAGES - that doesn't actually | 934 | * Don't do this for I_DIRTY_PAGES - that doesn't actually |
@@ -1089,7 +974,7 @@ void __mark_inode_dirty(struct inode *inode, int flags) | |||
1089 | if (hlist_unhashed(&inode->i_hash)) | 974 | if (hlist_unhashed(&inode->i_hash)) |
1090 | goto out; | 975 | goto out; |
1091 | } | 976 | } |
1092 | if (inode->i_state & (I_FREEING|I_CLEAR)) | 977 | if (inode->i_state & I_FREEING) |
1093 | goto out; | 978 | goto out; |
1094 | 979 | ||
1095 | /* | 980 | /* |
@@ -1097,22 +982,31 @@ void __mark_inode_dirty(struct inode *inode, int flags) | |||
1097 | * reposition it (that would break b_dirty time-ordering). | 982 | * reposition it (that would break b_dirty time-ordering). |
1098 | */ | 983 | */ |
1099 | if (!was_dirty) { | 984 | if (!was_dirty) { |
1100 | struct bdi_writeback *wb = &inode_to_bdi(inode)->wb; | 985 | bdi = inode_to_bdi(inode); |
1101 | struct backing_dev_info *bdi = wb->bdi; | 986 | |
1102 | 987 | if (bdi_cap_writeback_dirty(bdi)) { | |
1103 | if (bdi_cap_writeback_dirty(bdi) && | 988 | WARN(!test_bit(BDI_registered, &bdi->state), |
1104 | !test_bit(BDI_registered, &bdi->state)) { | 989 | "bdi-%s not registered\n", bdi->name); |
1105 | WARN_ON(1); | 990 | |
1106 | printk(KERN_ERR "bdi-%s not registered\n", | 991 | /* |
1107 | bdi->name); | 992 | * If this is the first dirty inode for this |
993 | * bdi, we have to wake-up the corresponding | ||
994 | * bdi thread to make sure background | ||
995 | * write-back happens later. | ||
996 | */ | ||
997 | if (!wb_has_dirty_io(&bdi->wb)) | ||
998 | wakeup_bdi = true; | ||
1108 | } | 999 | } |
1109 | 1000 | ||
1110 | inode->dirtied_when = jiffies; | 1001 | inode->dirtied_when = jiffies; |
1111 | list_move(&inode->i_list, &wb->b_dirty); | 1002 | list_move(&inode->i_list, &bdi->wb.b_dirty); |
1112 | } | 1003 | } |
1113 | } | 1004 | } |
1114 | out: | 1005 | out: |
1115 | spin_unlock(&inode_lock); | 1006 | spin_unlock(&inode_lock); |
1007 | |||
1008 | if (wakeup_bdi) | ||
1009 | bdi_wakeup_thread_delayed(bdi); | ||
1116 | } | 1010 | } |
1117 | EXPORT_SYMBOL(__mark_inode_dirty); | 1011 | EXPORT_SYMBOL(__mark_inode_dirty); |
1118 | 1012 | ||
@@ -1155,7 +1049,7 @@ static void wait_sb_inodes(struct super_block *sb) | |||
1155 | list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { | 1049 | list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { |
1156 | struct address_space *mapping; | 1050 | struct address_space *mapping; |
1157 | 1051 | ||
1158 | if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW)) | 1052 | if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) |
1159 | continue; | 1053 | continue; |
1160 | mapping = inode->i_mapping; | 1054 | mapping = inode->i_mapping; |
1161 | if (mapping->nrpages == 0) | 1055 | if (mapping->nrpages == 0) |
@@ -1196,12 +1090,20 @@ void writeback_inodes_sb(struct super_block *sb) | |||
1196 | { | 1090 | { |
1197 | unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY); | 1091 | unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY); |
1198 | unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS); | 1092 | unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS); |
1199 | long nr_to_write; | 1093 | DECLARE_COMPLETION_ONSTACK(done); |
1094 | struct wb_writeback_work work = { | ||
1095 | .sb = sb, | ||
1096 | .sync_mode = WB_SYNC_NONE, | ||
1097 | .done = &done, | ||
1098 | }; | ||
1099 | |||
1100 | WARN_ON(!rwsem_is_locked(&sb->s_umount)); | ||
1200 | 1101 | ||
1201 | nr_to_write = nr_dirty + nr_unstable + | 1102 | work.nr_pages = nr_dirty + nr_unstable + |
1202 | (inodes_stat.nr_inodes - inodes_stat.nr_unused); | 1103 | (inodes_stat.nr_inodes - inodes_stat.nr_unused); |
1203 | 1104 | ||
1204 | bdi_start_writeback(sb->s_bdi, sb, nr_to_write); | 1105 | bdi_queue_work(sb->s_bdi, &work); |
1106 | wait_for_completion(&done); | ||
1205 | } | 1107 | } |
1206 | EXPORT_SYMBOL(writeback_inodes_sb); | 1108 | EXPORT_SYMBOL(writeback_inodes_sb); |
1207 | 1109 | ||
@@ -1215,7 +1117,9 @@ EXPORT_SYMBOL(writeback_inodes_sb); | |||
1215 | int writeback_inodes_sb_if_idle(struct super_block *sb) | 1117 | int writeback_inodes_sb_if_idle(struct super_block *sb) |
1216 | { | 1118 | { |
1217 | if (!writeback_in_progress(sb->s_bdi)) { | 1119 | if (!writeback_in_progress(sb->s_bdi)) { |
1120 | down_read(&sb->s_umount); | ||
1218 | writeback_inodes_sb(sb); | 1121 | writeback_inodes_sb(sb); |
1122 | up_read(&sb->s_umount); | ||
1219 | return 1; | 1123 | return 1; |
1220 | } else | 1124 | } else |
1221 | return 0; | 1125 | return 0; |
@@ -1231,7 +1135,20 @@ EXPORT_SYMBOL(writeback_inodes_sb_if_idle); | |||
1231 | */ | 1135 | */ |
1232 | void sync_inodes_sb(struct super_block *sb) | 1136 | void sync_inodes_sb(struct super_block *sb) |
1233 | { | 1137 | { |
1234 | bdi_sync_writeback(sb->s_bdi, sb); | 1138 | DECLARE_COMPLETION_ONSTACK(done); |
1139 | struct wb_writeback_work work = { | ||
1140 | .sb = sb, | ||
1141 | .sync_mode = WB_SYNC_ALL, | ||
1142 | .nr_pages = LONG_MAX, | ||
1143 | .range_cyclic = 0, | ||
1144 | .done = &done, | ||
1145 | }; | ||
1146 | |||
1147 | WARN_ON(!rwsem_is_locked(&sb->s_umount)); | ||
1148 | |||
1149 | bdi_queue_work(sb->s_bdi, &work); | ||
1150 | wait_for_completion(&done); | ||
1151 | |||
1235 | wait_sb_inodes(sb); | 1152 | wait_sb_inodes(sb); |
1236 | } | 1153 | } |
1237 | EXPORT_SYMBOL(sync_inodes_sb); | 1154 | EXPORT_SYMBOL(sync_inodes_sb); |