aboutsummaryrefslogtreecommitdiffstats
path: root/fs/fs-writeback.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/fs-writeback.c')
-rw-r--r--fs/fs-writeback.c165
1 files changed, 116 insertions, 49 deletions
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 8e1e5e19d21e..9d5360c4c2af 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -41,8 +41,9 @@ struct wb_writeback_args {
41 long nr_pages; 41 long nr_pages;
42 struct super_block *sb; 42 struct super_block *sb;
43 enum writeback_sync_modes sync_mode; 43 enum writeback_sync_modes sync_mode;
44 int for_kupdate; 44 int for_kupdate:1;
45 int range_cyclic; 45 int range_cyclic:1;
46 int for_background:1;
46}; 47};
47 48
48/* 49/*
@@ -249,14 +250,25 @@ static void bdi_sync_writeback(struct backing_dev_info *bdi,
249 * completion. Caller need not hold sb s_umount semaphore. 250 * completion. Caller need not hold sb s_umount semaphore.
250 * 251 *
251 */ 252 */
252void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages) 253void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb,
254 long nr_pages)
253{ 255{
254 struct wb_writeback_args args = { 256 struct wb_writeback_args args = {
257 .sb = sb,
255 .sync_mode = WB_SYNC_NONE, 258 .sync_mode = WB_SYNC_NONE,
256 .nr_pages = nr_pages, 259 .nr_pages = nr_pages,
257 .range_cyclic = 1, 260 .range_cyclic = 1,
258 }; 261 };
259 262
263 /*
264 * We treat @nr_pages=0 as the special case to do background writeback,
265 * ie. to sync pages until the background dirty threshold is reached.
266 */
267 if (!nr_pages) {
268 args.nr_pages = LONG_MAX;
269 args.for_background = 1;
270 }
271
260 bdi_alloc_queue_work(bdi, &args); 272 bdi_alloc_queue_work(bdi, &args);
261} 273}
262 274
@@ -310,7 +322,7 @@ static bool inode_dirtied_after(struct inode *inode, unsigned long t)
310 * For inodes being constantly redirtied, dirtied_when can get stuck. 322 * For inodes being constantly redirtied, dirtied_when can get stuck.
311 * It _appears_ to be in the future, but is actually in distant past. 323 * It _appears_ to be in the future, but is actually in distant past.
312 * This test is necessary to prevent such wrapped-around relative times 324 * This test is necessary to prevent such wrapped-around relative times
313 * from permanently stopping the whole pdflush writeback. 325 * from permanently stopping the whole bdi writeback.
314 */ 326 */
315 ret = ret && time_before_eq(inode->dirtied_when, jiffies); 327 ret = ret && time_before_eq(inode->dirtied_when, jiffies);
316#endif 328#endif
@@ -324,13 +336,38 @@ static void move_expired_inodes(struct list_head *delaying_queue,
324 struct list_head *dispatch_queue, 336 struct list_head *dispatch_queue,
325 unsigned long *older_than_this) 337 unsigned long *older_than_this)
326{ 338{
339 LIST_HEAD(tmp);
340 struct list_head *pos, *node;
341 struct super_block *sb = NULL;
342 struct inode *inode;
343 int do_sb_sort = 0;
344
327 while (!list_empty(delaying_queue)) { 345 while (!list_empty(delaying_queue)) {
328 struct inode *inode = list_entry(delaying_queue->prev, 346 inode = list_entry(delaying_queue->prev, struct inode, i_list);
329 struct inode, i_list);
330 if (older_than_this && 347 if (older_than_this &&
331 inode_dirtied_after(inode, *older_than_this)) 348 inode_dirtied_after(inode, *older_than_this))
332 break; 349 break;
333 list_move(&inode->i_list, dispatch_queue); 350 if (sb && sb != inode->i_sb)
351 do_sb_sort = 1;
352 sb = inode->i_sb;
353 list_move(&inode->i_list, &tmp);
354 }
355
356 /* just one sb in list, splice to dispatch_queue and we're done */
357 if (!do_sb_sort) {
358 list_splice(&tmp, dispatch_queue);
359 return;
360 }
361
362 /* Move inodes from one superblock together */
363 while (!list_empty(&tmp)) {
364 inode = list_entry(tmp.prev, struct inode, i_list);
365 sb = inode->i_sb;
366 list_for_each_prev_safe(pos, node, &tmp) {
367 inode = list_entry(pos, struct inode, i_list);
368 if (inode->i_sb == sb)
369 list_move(&inode->i_list, dispatch_queue);
370 }
334 } 371 }
335} 372}
336 373
@@ -439,8 +476,18 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
439 spin_lock(&inode_lock); 476 spin_lock(&inode_lock);
440 inode->i_state &= ~I_SYNC; 477 inode->i_state &= ~I_SYNC;
441 if (!(inode->i_state & (I_FREEING | I_CLEAR))) { 478 if (!(inode->i_state & (I_FREEING | I_CLEAR))) {
442 if (!(inode->i_state & I_DIRTY) && 479 if ((inode->i_state & I_DIRTY_PAGES) && wbc->for_kupdate) {
443 mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { 480 /*
481 * More pages get dirtied by a fast dirtier.
482 */
483 goto select_queue;
484 } else if (inode->i_state & I_DIRTY) {
485 /*
486 * At least XFS will redirty the inode during the
487 * writeback (delalloc) and on io completion (isize).
488 */
489 redirty_tail(inode);
490 } else if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
444 /* 491 /*
445 * We didn't write back all the pages. nfs_writepages() 492 * We didn't write back all the pages. nfs_writepages()
446 * sometimes bales out without doing anything. Redirty 493 * sometimes bales out without doing anything. Redirty
@@ -462,6 +509,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
462 * soon as the queue becomes uncongested. 509 * soon as the queue becomes uncongested.
463 */ 510 */
464 inode->i_state |= I_DIRTY_PAGES; 511 inode->i_state |= I_DIRTY_PAGES;
512select_queue:
465 if (wbc->nr_to_write <= 0) { 513 if (wbc->nr_to_write <= 0) {
466 /* 514 /*
467 * slice used up: queue for next turn 515 * slice used up: queue for next turn
@@ -484,12 +532,6 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
484 inode->i_state |= I_DIRTY_PAGES; 532 inode->i_state |= I_DIRTY_PAGES;
485 redirty_tail(inode); 533 redirty_tail(inode);
486 } 534 }
487 } else if (inode->i_state & I_DIRTY) {
488 /*
489 * Someone redirtied the inode while were writing back
490 * the pages.
491 */
492 redirty_tail(inode);
493 } else if (atomic_read(&inode->i_count)) { 535 } else if (atomic_read(&inode->i_count)) {
494 /* 536 /*
495 * The inode is clean, inuse 537 * The inode is clean, inuse
@@ -506,6 +548,17 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
506 return ret; 548 return ret;
507} 549}
508 550
551static void unpin_sb_for_writeback(struct super_block **psb)
552{
553 struct super_block *sb = *psb;
554
555 if (sb) {
556 up_read(&sb->s_umount);
557 put_super(sb);
558 *psb = NULL;
559 }
560}
561
509/* 562/*
510 * For WB_SYNC_NONE writeback, the caller does not have the sb pinned 563 * For WB_SYNC_NONE writeback, the caller does not have the sb pinned
511 * before calling writeback. So make sure that we do pin it, so it doesn't 564 * before calling writeback. So make sure that we do pin it, so it doesn't
@@ -515,11 +568,20 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
515 * 1 if we failed. 568 * 1 if we failed.
516 */ 569 */
517static int pin_sb_for_writeback(struct writeback_control *wbc, 570static int pin_sb_for_writeback(struct writeback_control *wbc,
518 struct inode *inode) 571 struct inode *inode, struct super_block **psb)
519{ 572{
520 struct super_block *sb = inode->i_sb; 573 struct super_block *sb = inode->i_sb;
521 574
522 /* 575 /*
576 * If this sb is already pinned, nothing more to do. If not and
577 * *psb is non-NULL, unpin the old one first
578 */
579 if (sb == *psb)
580 return 0;
581 else if (*psb)
582 unpin_sb_for_writeback(psb);
583
584 /*
523 * Caller must already hold the ref for this 585 * Caller must already hold the ref for this
524 */ 586 */
525 if (wbc->sync_mode == WB_SYNC_ALL) { 587 if (wbc->sync_mode == WB_SYNC_ALL) {
@@ -532,7 +594,7 @@ static int pin_sb_for_writeback(struct writeback_control *wbc,
532 if (down_read_trylock(&sb->s_umount)) { 594 if (down_read_trylock(&sb->s_umount)) {
533 if (sb->s_root) { 595 if (sb->s_root) {
534 spin_unlock(&sb_lock); 596 spin_unlock(&sb_lock);
535 return 0; 597 goto pinned;
536 } 598 }
537 /* 599 /*
538 * umounted, drop rwsem again and fall through to failure 600 * umounted, drop rwsem again and fall through to failure
@@ -543,24 +605,15 @@ static int pin_sb_for_writeback(struct writeback_control *wbc,
543 sb->s_count--; 605 sb->s_count--;
544 spin_unlock(&sb_lock); 606 spin_unlock(&sb_lock);
545 return 1; 607 return 1;
546} 608pinned:
547 609 *psb = sb;
548static void unpin_sb_for_writeback(struct writeback_control *wbc, 610 return 0;
549 struct inode *inode)
550{
551 struct super_block *sb = inode->i_sb;
552
553 if (wbc->sync_mode == WB_SYNC_ALL)
554 return;
555
556 up_read(&sb->s_umount);
557 put_super(sb);
558} 611}
559 612
560static void writeback_inodes_wb(struct bdi_writeback *wb, 613static void writeback_inodes_wb(struct bdi_writeback *wb,
561 struct writeback_control *wbc) 614 struct writeback_control *wbc)
562{ 615{
563 struct super_block *sb = wbc->sb; 616 struct super_block *sb = wbc->sb, *pin_sb = NULL;
564 const int is_blkdev_sb = sb_is_blkdev_sb(sb); 617 const int is_blkdev_sb = sb_is_blkdev_sb(sb);
565 const unsigned long start = jiffies; /* livelock avoidance */ 618 const unsigned long start = jiffies; /* livelock avoidance */
566 619
@@ -619,7 +672,7 @@ static void writeback_inodes_wb(struct bdi_writeback *wb,
619 if (inode_dirtied_after(inode, start)) 672 if (inode_dirtied_after(inode, start))
620 break; 673 break;
621 674
622 if (pin_sb_for_writeback(wbc, inode)) { 675 if (pin_sb_for_writeback(wbc, inode, &pin_sb)) {
623 requeue_io(inode); 676 requeue_io(inode);
624 continue; 677 continue;
625 } 678 }
@@ -628,7 +681,6 @@ static void writeback_inodes_wb(struct bdi_writeback *wb,
628 __iget(inode); 681 __iget(inode);
629 pages_skipped = wbc->pages_skipped; 682 pages_skipped = wbc->pages_skipped;
630 writeback_single_inode(inode, wbc); 683 writeback_single_inode(inode, wbc);
631 unpin_sb_for_writeback(wbc, inode);
632 if (wbc->pages_skipped != pages_skipped) { 684 if (wbc->pages_skipped != pages_skipped) {
633 /* 685 /*
634 * writeback is not making progress due to locked 686 * writeback is not making progress due to locked
@@ -648,6 +700,8 @@ static void writeback_inodes_wb(struct bdi_writeback *wb,
648 wbc->more_io = 1; 700 wbc->more_io = 1;
649 } 701 }
650 702
703 unpin_sb_for_writeback(&pin_sb);
704
651 spin_unlock(&inode_lock); 705 spin_unlock(&inode_lock);
652 /* Leave any unwritten inodes on b_io */ 706 /* Leave any unwritten inodes on b_io */
653} 707}
@@ -706,6 +760,7 @@ static long wb_writeback(struct bdi_writeback *wb,
706 }; 760 };
707 unsigned long oldest_jif; 761 unsigned long oldest_jif;
708 long wrote = 0; 762 long wrote = 0;
763 struct inode *inode;
709 764
710 if (wbc.for_kupdate) { 765 if (wbc.for_kupdate) {
711 wbc.older_than_this = &oldest_jif; 766 wbc.older_than_this = &oldest_jif;
@@ -719,20 +774,16 @@ static long wb_writeback(struct bdi_writeback *wb,
719 774
720 for (;;) { 775 for (;;) {
721 /* 776 /*
722 * Don't flush anything for non-integrity writeback where 777 * Stop writeback when nr_pages has been consumed
723 * no nr_pages was given
724 */ 778 */
725 if (!args->for_kupdate && args->nr_pages <= 0 && 779 if (args->nr_pages <= 0)
726 args->sync_mode == WB_SYNC_NONE)
727 break; 780 break;
728 781
729 /* 782 /*
730 * If no specific pages were given and this is just a 783 * For background writeout, stop when we are below the
731 * periodic background writeout and we are below the 784 * background dirty threshold
732 * background dirty threshold, don't do anything
733 */ 785 */
734 if (args->for_kupdate && args->nr_pages <= 0 && 786 if (args->for_background && !over_bground_thresh())
735 !over_bground_thresh())
736 break; 787 break;
737 788
738 wbc.more_io = 0; 789 wbc.more_io = 0;
@@ -744,13 +795,32 @@ static long wb_writeback(struct bdi_writeback *wb,
744 wrote += MAX_WRITEBACK_PAGES - wbc.nr_to_write; 795 wrote += MAX_WRITEBACK_PAGES - wbc.nr_to_write;
745 796
746 /* 797 /*
747 * If we ran out of stuff to write, bail unless more_io got set 798 * If we consumed everything, see if we have more
748 */ 799 */
749 if (wbc.nr_to_write > 0 || wbc.pages_skipped > 0) { 800 if (wbc.nr_to_write <= 0)
750 if (wbc.more_io && !wbc.for_kupdate) 801 continue;
751 continue; 802 /*
803 * Didn't write everything and we don't have more IO, bail
804 */
805 if (!wbc.more_io)
752 break; 806 break;
807 /*
808 * Did we write something? Try for more
809 */
810 if (wbc.nr_to_write < MAX_WRITEBACK_PAGES)
811 continue;
812 /*
813 * Nothing written. Wait for some inode to
814 * become available for writeback. Otherwise
815 * we'll just busyloop.
816 */
817 spin_lock(&inode_lock);
818 if (!list_empty(&wb->b_more_io)) {
819 inode = list_entry(wb->b_more_io.prev,
820 struct inode, i_list);
821 inode_wait_for_writeback(inode);
753 } 822 }
823 spin_unlock(&inode_lock);
754 } 824 }
755 825
756 return wrote; 826 return wrote;
@@ -1060,9 +1130,6 @@ EXPORT_SYMBOL(__mark_inode_dirty);
1060 * If older_than_this is non-NULL, then only write out inodes which 1130 * If older_than_this is non-NULL, then only write out inodes which
1061 * had their first dirtying at a time earlier than *older_than_this. 1131 * had their first dirtying at a time earlier than *older_than_this.
1062 * 1132 *
1063 * If we're a pdlfush thread, then implement pdflush collision avoidance
1064 * against the entire list.
1065 *
1066 * If `bdi' is non-zero then we're being asked to writeback a specific queue. 1133 * If `bdi' is non-zero then we're being asked to writeback a specific queue.
1067 * This function assumes that the blockdev superblock's inodes are backed by 1134 * This function assumes that the blockdev superblock's inodes are backed by
1068 * a variety of queues, so all inodes are searched. For other superblocks, 1135 * a variety of queues, so all inodes are searched. For other superblocks,
@@ -1141,7 +1208,7 @@ void writeback_inodes_sb(struct super_block *sb)
1141 nr_to_write = nr_dirty + nr_unstable + 1208 nr_to_write = nr_dirty + nr_unstable +
1142 (inodes_stat.nr_inodes - inodes_stat.nr_unused); 1209 (inodes_stat.nr_inodes - inodes_stat.nr_unused);
1143 1210
1144 bdi_writeback_all(sb, nr_to_write); 1211 bdi_start_writeback(sb->s_bdi, sb, nr_to_write);
1145} 1212}
1146EXPORT_SYMBOL(writeback_inodes_sb); 1213EXPORT_SYMBOL(writeback_inodes_sb);
1147 1214