aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2009-09-25 12:27:30 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2009-09-25 12:27:30 -0400
commit6d7f18f6ea3a13af95bdf507fc54d42b165e1712 (patch)
tree8f6f3a6d46835aa767823fa7049609408a87afc2 /fs
parent53cddfcc0e760d2b364878b6dadbd0c6d087cfae (diff)
parent56a131dcf7ed36c3c6e36bea448b674ea85ed5bb (diff)
Merge branch 'writeback' of git://git.kernel.dk/linux-2.6-block
* 'writeback' of git://git.kernel.dk/linux-2.6-block: writeback: writeback_inodes_sb() should use bdi_start_writeback() writeback: don't delay inodes redirtied by a fast dirtier writeback: make the super_block pinning more efficient writeback: don't resort for a single super_block in move_expired_inodes() writeback: move inodes from one super_block together writeback: get rid to incorrect references to pdflush in comments writeback: improve readability of the wb_writeback() continue/break logic writeback: cleanup writeback_single_inode() writeback: kupdate writeback shall not stop when more io is possible writeback: stop background writeback when below background threshold writeback: balance_dirty_pages() shall write more than dirtied pages fs: Fix busyloop in wb_writeback()
Diffstat (limited to 'fs')
-rw-r--r--fs/buffer.c10
-rw-r--r--fs/fs-writeback.c161
2 files changed, 118 insertions, 53 deletions
diff --git a/fs/buffer.c b/fs/buffer.c
index 24afd7422ae8..6fa530256bfd 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -280,7 +280,7 @@ void invalidate_bdev(struct block_device *bdev)
280EXPORT_SYMBOL(invalidate_bdev); 280EXPORT_SYMBOL(invalidate_bdev);
281 281
282/* 282/*
283 * Kick pdflush then try to free up some ZONE_NORMAL memory. 283 * Kick the writeback threads then try to free up some ZONE_NORMAL memory.
284 */ 284 */
285static void free_more_memory(void) 285static void free_more_memory(void)
286{ 286{
@@ -1709,9 +1709,9 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
1709 /* 1709 /*
1710 * If it's a fully non-blocking write attempt and we cannot 1710 * If it's a fully non-blocking write attempt and we cannot
1711 * lock the buffer then redirty the page. Note that this can 1711 * lock the buffer then redirty the page. Note that this can
1712 * potentially cause a busy-wait loop from pdflush and kswapd 1712 * potentially cause a busy-wait loop from writeback threads
1713 * activity, but those code paths have their own higher-level 1713 * and kswapd activity, but those code paths have their own
1714 * throttling. 1714 * higher-level throttling.
1715 */ 1715 */
1716 if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) { 1716 if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) {
1717 lock_buffer(bh); 1717 lock_buffer(bh);
@@ -3208,7 +3208,7 @@ EXPORT_SYMBOL(block_sync_page);
3208 * still running obsolete flush daemons, so we terminate them here. 3208 * still running obsolete flush daemons, so we terminate them here.
3209 * 3209 *
3210 * Use of bdflush() is deprecated and will be removed in a future kernel. 3210 * Use of bdflush() is deprecated and will be removed in a future kernel.
3211 * The `pdflush' kernel threads fully replace bdflush daemons and this call. 3211 * The `flush-X' kernel threads fully replace bdflush daemons and this call.
3212 */ 3212 */
3213SYSCALL_DEFINE2(bdflush, int, func, long, data) 3213SYSCALL_DEFINE2(bdflush, int, func, long, data)
3214{ 3214{
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 8e1e5e19d21e..fb61178c86e3 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -41,8 +41,9 @@ struct wb_writeback_args {
41 long nr_pages; 41 long nr_pages;
42 struct super_block *sb; 42 struct super_block *sb;
43 enum writeback_sync_modes sync_mode; 43 enum writeback_sync_modes sync_mode;
44 int for_kupdate; 44 int for_kupdate:1;
45 int range_cyclic; 45 int range_cyclic:1;
46 int for_background:1;
46}; 47};
47 48
48/* 49/*
@@ -257,6 +258,15 @@ void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages)
257 .range_cyclic = 1, 258 .range_cyclic = 1,
258 }; 259 };
259 260
261 /*
262 * We treat @nr_pages=0 as the special case to do background writeback,
263 * ie. to sync pages until the background dirty threshold is reached.
264 */
265 if (!nr_pages) {
266 args.nr_pages = LONG_MAX;
267 args.for_background = 1;
268 }
269
260 bdi_alloc_queue_work(bdi, &args); 270 bdi_alloc_queue_work(bdi, &args);
261} 271}
262 272
@@ -310,7 +320,7 @@ static bool inode_dirtied_after(struct inode *inode, unsigned long t)
310 * For inodes being constantly redirtied, dirtied_when can get stuck. 320 * For inodes being constantly redirtied, dirtied_when can get stuck.
311 * It _appears_ to be in the future, but is actually in distant past. 321 * It _appears_ to be in the future, but is actually in distant past.
312 * This test is necessary to prevent such wrapped-around relative times 322 * This test is necessary to prevent such wrapped-around relative times
313 * from permanently stopping the whole pdflush writeback. 323 * from permanently stopping the whole bdi writeback.
314 */ 324 */
315 ret = ret && time_before_eq(inode->dirtied_when, jiffies); 325 ret = ret && time_before_eq(inode->dirtied_when, jiffies);
316#endif 326#endif
@@ -324,13 +334,38 @@ static void move_expired_inodes(struct list_head *delaying_queue,
324 struct list_head *dispatch_queue, 334 struct list_head *dispatch_queue,
325 unsigned long *older_than_this) 335 unsigned long *older_than_this)
326{ 336{
337 LIST_HEAD(tmp);
338 struct list_head *pos, *node;
339 struct super_block *sb = NULL;
340 struct inode *inode;
341 int do_sb_sort = 0;
342
327 while (!list_empty(delaying_queue)) { 343 while (!list_empty(delaying_queue)) {
328 struct inode *inode = list_entry(delaying_queue->prev, 344 inode = list_entry(delaying_queue->prev, struct inode, i_list);
329 struct inode, i_list);
330 if (older_than_this && 345 if (older_than_this &&
331 inode_dirtied_after(inode, *older_than_this)) 346 inode_dirtied_after(inode, *older_than_this))
332 break; 347 break;
333 list_move(&inode->i_list, dispatch_queue); 348 if (sb && sb != inode->i_sb)
349 do_sb_sort = 1;
350 sb = inode->i_sb;
351 list_move(&inode->i_list, &tmp);
352 }
353
354 /* just one sb in list, splice to dispatch_queue and we're done */
355 if (!do_sb_sort) {
356 list_splice(&tmp, dispatch_queue);
357 return;
358 }
359
360 /* Move inodes from one superblock together */
361 while (!list_empty(&tmp)) {
362 inode = list_entry(tmp.prev, struct inode, i_list);
363 sb = inode->i_sb;
364 list_for_each_prev_safe(pos, node, &tmp) {
365 inode = list_entry(pos, struct inode, i_list);
366 if (inode->i_sb == sb)
367 list_move(&inode->i_list, dispatch_queue);
368 }
334 } 369 }
335} 370}
336 371
@@ -439,8 +474,18 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
439 spin_lock(&inode_lock); 474 spin_lock(&inode_lock);
440 inode->i_state &= ~I_SYNC; 475 inode->i_state &= ~I_SYNC;
441 if (!(inode->i_state & (I_FREEING | I_CLEAR))) { 476 if (!(inode->i_state & (I_FREEING | I_CLEAR))) {
442 if (!(inode->i_state & I_DIRTY) && 477 if ((inode->i_state & I_DIRTY_PAGES) && wbc->for_kupdate) {
443 mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { 478 /*
479 * More pages get dirtied by a fast dirtier.
480 */
481 goto select_queue;
482 } else if (inode->i_state & I_DIRTY) {
483 /*
484 * At least XFS will redirty the inode during the
485 * writeback (delalloc) and on io completion (isize).
486 */
487 redirty_tail(inode);
488 } else if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
444 /* 489 /*
445 * We didn't write back all the pages. nfs_writepages() 490 * We didn't write back all the pages. nfs_writepages()
446 * sometimes bales out without doing anything. Redirty 491 * sometimes bales out without doing anything. Redirty
@@ -462,6 +507,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
462 * soon as the queue becomes uncongested. 507 * soon as the queue becomes uncongested.
463 */ 508 */
464 inode->i_state |= I_DIRTY_PAGES; 509 inode->i_state |= I_DIRTY_PAGES;
510select_queue:
465 if (wbc->nr_to_write <= 0) { 511 if (wbc->nr_to_write <= 0) {
466 /* 512 /*
467 * slice used up: queue for next turn 513 * slice used up: queue for next turn
@@ -484,12 +530,6 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
484 inode->i_state |= I_DIRTY_PAGES; 530 inode->i_state |= I_DIRTY_PAGES;
485 redirty_tail(inode); 531 redirty_tail(inode);
486 } 532 }
487 } else if (inode->i_state & I_DIRTY) {
488 /*
489 * Someone redirtied the inode while were writing back
490 * the pages.
491 */
492 redirty_tail(inode);
493 } else if (atomic_read(&inode->i_count)) { 533 } else if (atomic_read(&inode->i_count)) {
494 /* 534 /*
495 * The inode is clean, inuse 535 * The inode is clean, inuse
@@ -506,6 +546,17 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
506 return ret; 546 return ret;
507} 547}
508 548
549static void unpin_sb_for_writeback(struct super_block **psb)
550{
551 struct super_block *sb = *psb;
552
553 if (sb) {
554 up_read(&sb->s_umount);
555 put_super(sb);
556 *psb = NULL;
557 }
558}
559
509/* 560/*
510 * For WB_SYNC_NONE writeback, the caller does not have the sb pinned 561 * For WB_SYNC_NONE writeback, the caller does not have the sb pinned
511 * before calling writeback. So make sure that we do pin it, so it doesn't 562 * before calling writeback. So make sure that we do pin it, so it doesn't
@@ -515,11 +566,20 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
515 * 1 if we failed. 566 * 1 if we failed.
516 */ 567 */
517static int pin_sb_for_writeback(struct writeback_control *wbc, 568static int pin_sb_for_writeback(struct writeback_control *wbc,
518 struct inode *inode) 569 struct inode *inode, struct super_block **psb)
519{ 570{
520 struct super_block *sb = inode->i_sb; 571 struct super_block *sb = inode->i_sb;
521 572
522 /* 573 /*
574 * If this sb is already pinned, nothing more to do. If not and
575 * *psb is non-NULL, unpin the old one first
576 */
577 if (sb == *psb)
578 return 0;
579 else if (*psb)
580 unpin_sb_for_writeback(psb);
581
582 /*
523 * Caller must already hold the ref for this 583 * Caller must already hold the ref for this
524 */ 584 */
525 if (wbc->sync_mode == WB_SYNC_ALL) { 585 if (wbc->sync_mode == WB_SYNC_ALL) {
@@ -532,7 +592,7 @@ static int pin_sb_for_writeback(struct writeback_control *wbc,
532 if (down_read_trylock(&sb->s_umount)) { 592 if (down_read_trylock(&sb->s_umount)) {
533 if (sb->s_root) { 593 if (sb->s_root) {
534 spin_unlock(&sb_lock); 594 spin_unlock(&sb_lock);
535 return 0; 595 goto pinned;
536 } 596 }
537 /* 597 /*
538 * umounted, drop rwsem again and fall through to failure 598 * umounted, drop rwsem again and fall through to failure
@@ -543,24 +603,15 @@ static int pin_sb_for_writeback(struct writeback_control *wbc,
543 sb->s_count--; 603 sb->s_count--;
544 spin_unlock(&sb_lock); 604 spin_unlock(&sb_lock);
545 return 1; 605 return 1;
546} 606pinned:
547 607 *psb = sb;
548static void unpin_sb_for_writeback(struct writeback_control *wbc, 608 return 0;
549 struct inode *inode)
550{
551 struct super_block *sb = inode->i_sb;
552
553 if (wbc->sync_mode == WB_SYNC_ALL)
554 return;
555
556 up_read(&sb->s_umount);
557 put_super(sb);
558} 609}
559 610
560static void writeback_inodes_wb(struct bdi_writeback *wb, 611static void writeback_inodes_wb(struct bdi_writeback *wb,
561 struct writeback_control *wbc) 612 struct writeback_control *wbc)
562{ 613{
563 struct super_block *sb = wbc->sb; 614 struct super_block *sb = wbc->sb, *pin_sb = NULL;
564 const int is_blkdev_sb = sb_is_blkdev_sb(sb); 615 const int is_blkdev_sb = sb_is_blkdev_sb(sb);
565 const unsigned long start = jiffies; /* livelock avoidance */ 616 const unsigned long start = jiffies; /* livelock avoidance */
566 617
@@ -619,7 +670,7 @@ static void writeback_inodes_wb(struct bdi_writeback *wb,
619 if (inode_dirtied_after(inode, start)) 670 if (inode_dirtied_after(inode, start))
620 break; 671 break;
621 672
622 if (pin_sb_for_writeback(wbc, inode)) { 673 if (pin_sb_for_writeback(wbc, inode, &pin_sb)) {
623 requeue_io(inode); 674 requeue_io(inode);
624 continue; 675 continue;
625 } 676 }
@@ -628,7 +679,6 @@ static void writeback_inodes_wb(struct bdi_writeback *wb,
628 __iget(inode); 679 __iget(inode);
629 pages_skipped = wbc->pages_skipped; 680 pages_skipped = wbc->pages_skipped;
630 writeback_single_inode(inode, wbc); 681 writeback_single_inode(inode, wbc);
631 unpin_sb_for_writeback(wbc, inode);
632 if (wbc->pages_skipped != pages_skipped) { 682 if (wbc->pages_skipped != pages_skipped) {
633 /* 683 /*
634 * writeback is not making progress due to locked 684 * writeback is not making progress due to locked
@@ -648,6 +698,8 @@ static void writeback_inodes_wb(struct bdi_writeback *wb,
648 wbc->more_io = 1; 698 wbc->more_io = 1;
649 } 699 }
650 700
701 unpin_sb_for_writeback(&pin_sb);
702
651 spin_unlock(&inode_lock); 703 spin_unlock(&inode_lock);
652 /* Leave any unwritten inodes on b_io */ 704 /* Leave any unwritten inodes on b_io */
653} 705}
@@ -706,6 +758,7 @@ static long wb_writeback(struct bdi_writeback *wb,
706 }; 758 };
707 unsigned long oldest_jif; 759 unsigned long oldest_jif;
708 long wrote = 0; 760 long wrote = 0;
761 struct inode *inode;
709 762
710 if (wbc.for_kupdate) { 763 if (wbc.for_kupdate) {
711 wbc.older_than_this = &oldest_jif; 764 wbc.older_than_this = &oldest_jif;
@@ -719,20 +772,16 @@ static long wb_writeback(struct bdi_writeback *wb,
719 772
720 for (;;) { 773 for (;;) {
721 /* 774 /*
722 * Don't flush anything for non-integrity writeback where 775 * Stop writeback when nr_pages has been consumed
723 * no nr_pages was given
724 */ 776 */
725 if (!args->for_kupdate && args->nr_pages <= 0 && 777 if (args->nr_pages <= 0)
726 args->sync_mode == WB_SYNC_NONE)
727 break; 778 break;
728 779
729 /* 780 /*
730 * If no specific pages were given and this is just a 781 * For background writeout, stop when we are below the
731 * periodic background writeout and we are below the 782 * background dirty threshold
732 * background dirty threshold, don't do anything
733 */ 783 */
734 if (args->for_kupdate && args->nr_pages <= 0 && 784 if (args->for_background && !over_bground_thresh())
735 !over_bground_thresh())
736 break; 785 break;
737 786
738 wbc.more_io = 0; 787 wbc.more_io = 0;
@@ -744,13 +793,32 @@ static long wb_writeback(struct bdi_writeback *wb,
744 wrote += MAX_WRITEBACK_PAGES - wbc.nr_to_write; 793 wrote += MAX_WRITEBACK_PAGES - wbc.nr_to_write;
745 794
746 /* 795 /*
747 * If we ran out of stuff to write, bail unless more_io got set 796 * If we consumed everything, see if we have more
748 */ 797 */
749 if (wbc.nr_to_write > 0 || wbc.pages_skipped > 0) { 798 if (wbc.nr_to_write <= 0)
750 if (wbc.more_io && !wbc.for_kupdate) 799 continue;
751 continue; 800 /*
801 * Didn't write everything and we don't have more IO, bail
802 */
803 if (!wbc.more_io)
752 break; 804 break;
805 /*
806 * Did we write something? Try for more
807 */
808 if (wbc.nr_to_write < MAX_WRITEBACK_PAGES)
809 continue;
810 /*
811 * Nothing written. Wait for some inode to
812 * become available for writeback. Otherwise
813 * we'll just busyloop.
814 */
815 spin_lock(&inode_lock);
816 if (!list_empty(&wb->b_more_io)) {
817 inode = list_entry(wb->b_more_io.prev,
818 struct inode, i_list);
819 inode_wait_for_writeback(inode);
753 } 820 }
821 spin_unlock(&inode_lock);
754 } 822 }
755 823
756 return wrote; 824 return wrote;
@@ -1060,9 +1128,6 @@ EXPORT_SYMBOL(__mark_inode_dirty);
1060 * If older_than_this is non-NULL, then only write out inodes which 1128 * If older_than_this is non-NULL, then only write out inodes which
1061 * had their first dirtying at a time earlier than *older_than_this. 1129 * had their first dirtying at a time earlier than *older_than_this.
1062 * 1130 *
1063 * If we're a pdlfush thread, then implement pdflush collision avoidance
1064 * against the entire list.
1065 *
1066 * If `bdi' is non-zero then we're being asked to writeback a specific queue. 1131 * If `bdi' is non-zero then we're being asked to writeback a specific queue.
1067 * This function assumes that the blockdev superblock's inodes are backed by 1132 * This function assumes that the blockdev superblock's inodes are backed by
1068 * a variety of queues, so all inodes are searched. For other superblocks, 1133 * a variety of queues, so all inodes are searched. For other superblocks,
@@ -1141,7 +1206,7 @@ void writeback_inodes_sb(struct super_block *sb)
1141 nr_to_write = nr_dirty + nr_unstable + 1206 nr_to_write = nr_dirty + nr_unstable +
1142 (inodes_stat.nr_inodes - inodes_stat.nr_unused); 1207 (inodes_stat.nr_inodes - inodes_stat.nr_unused);
1143 1208
1144 bdi_writeback_all(sb, nr_to_write); 1209 bdi_start_writeback(sb->s_bdi, nr_to_write);
1145} 1210}
1146EXPORT_SYMBOL(writeback_inodes_sb); 1211EXPORT_SYMBOL(writeback_inodes_sb);
1147 1212