aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEdward Shishkin <edward.shishkin@gmail.com>2010-03-11 17:09:47 -0500
committerJens Axboe <jens.axboe@oracle.com>2010-03-12 04:03:42 -0500
commitf11c9c5c259cb2c3d698548dc3936f773ab1f5b9 (patch)
treec6461c9d6981122e1507dafa0394901903eb2ca1
parentc12ec0a2d94001003dfb929ce14c287fca0522b0 (diff)
vfs: improve writeback_inodes_wb()
Do not pin/unpin superblock for every inode in writeback_inodes_wb(), pin it for the whole group of inodes which belong to the same superblock and call writeback_sb_inodes() handler for them. Signed-off-by: Edward Shishkin <edward.shishkin@gmail.com> Cc: Jens Axboe <jens.axboe@oracle.com> Cc: Wu Fengguang <fengguang.wu@intel.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
-rw-r--r--fs/fs-writeback.c133
-rw-r--r--include/linux/writeback.h3
2 files changed, 76 insertions, 60 deletions
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 76fc4d594acb..6841effa47ca 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -553,108 +553,85 @@ select_queue:
553 return ret; 553 return ret;
554} 554}
555 555
556static void unpin_sb_for_writeback(struct super_block **psb) 556static void unpin_sb_for_writeback(struct super_block *sb)
557{ 557{
558 struct super_block *sb = *psb; 558 up_read(&sb->s_umount);
559 559 put_super(sb);
560 if (sb) {
561 up_read(&sb->s_umount);
562 put_super(sb);
563 *psb = NULL;
564 }
565} 560}
566 561
562enum sb_pin_state {
563 SB_PINNED,
564 SB_NOT_PINNED,
565 SB_PIN_FAILED
566};
567
567/* 568/*
568 * For WB_SYNC_NONE writeback, the caller does not have the sb pinned 569 * For WB_SYNC_NONE writeback, the caller does not have the sb pinned
569 * before calling writeback. So make sure that we do pin it, so it doesn't 570 * before calling writeback. So make sure that we do pin it, so it doesn't
570 * go away while we are writing inodes from it. 571 * go away while we are writing inodes from it.
571 *
572 * Returns 0 if the super was successfully pinned (or pinning wasn't needed),
573 * 1 if we failed.
574 */ 572 */
575static int pin_sb_for_writeback(struct writeback_control *wbc, 573static enum sb_pin_state pin_sb_for_writeback(struct writeback_control *wbc,
576 struct inode *inode, struct super_block **psb) 574 struct super_block *sb)
577{ 575{
578 struct super_block *sb = inode->i_sb;
579
580 /*
581 * If this sb is already pinned, nothing more to do. If not and
582 * *psb is non-NULL, unpin the old one first
583 */
584 if (sb == *psb)
585 return 0;
586 else if (*psb)
587 unpin_sb_for_writeback(psb);
588
589 /* 576 /*
590 * Caller must already hold the ref for this 577 * Caller must already hold the ref for this
591 */ 578 */
592 if (wbc->sync_mode == WB_SYNC_ALL) { 579 if (wbc->sync_mode == WB_SYNC_ALL) {
593 WARN_ON(!rwsem_is_locked(&sb->s_umount)); 580 WARN_ON(!rwsem_is_locked(&sb->s_umount));
594 return 0; 581 return SB_NOT_PINNED;
595 } 582 }
596
597 spin_lock(&sb_lock); 583 spin_lock(&sb_lock);
598 sb->s_count++; 584 sb->s_count++;
599 if (down_read_trylock(&sb->s_umount)) { 585 if (down_read_trylock(&sb->s_umount)) {
600 if (sb->s_root) { 586 if (sb->s_root) {
601 spin_unlock(&sb_lock); 587 spin_unlock(&sb_lock);
602 goto pinned; 588 return SB_PINNED;
603 } 589 }
604 /* 590 /*
605 * umounted, drop rwsem again and fall through to failure 591 * umounted, drop rwsem again and fall through to failure
606 */ 592 */
607 up_read(&sb->s_umount); 593 up_read(&sb->s_umount);
608 } 594 }
609
610 sb->s_count--; 595 sb->s_count--;
611 spin_unlock(&sb_lock); 596 spin_unlock(&sb_lock);
612 return 1; 597 return SB_PIN_FAILED;
613pinned:
614 *psb = sb;
615 return 0;
616} 598}
617 599
618static void writeback_inodes_wb(struct bdi_writeback *wb, 600/*
619 struct writeback_control *wbc) 601 * Write a portion of b_io inodes which belong to @sb.
602 * If @wbc->sb != NULL, then find and write all such
603 * inodes. Otherwise write only ones which go sequentially
604 * in reverse order.
605 * Return 1, if the caller writeback routine should be
606 * interrupted. Otherwise return 0.
607 */
608static int writeback_sb_inodes(struct super_block *sb,
609 struct bdi_writeback *wb,
610 struct writeback_control *wbc)
620{ 611{
621 struct super_block *sb = wbc->sb, *pin_sb = NULL;
622 const unsigned long start = jiffies; /* livelock avoidance */
623
624 spin_lock(&inode_lock);
625
626 if (!wbc->for_kupdate || list_empty(&wb->b_io))
627 queue_io(wb, wbc->older_than_this);
628
629 while (!list_empty(&wb->b_io)) { 612 while (!list_empty(&wb->b_io)) {
630 struct inode *inode = list_entry(wb->b_io.prev,
631 struct inode, i_list);
632 long pages_skipped; 613 long pages_skipped;
633 614 struct inode *inode = list_entry(wb->b_io.prev,
634 /* 615 struct inode, i_list);
635 * super block given and doesn't match, skip this inode 616 if (wbc->sb && sb != inode->i_sb) {
636 */ 617 /* super block given and doesn't
637 if (sb && sb != inode->i_sb) { 618 match, skip this inode */
638 redirty_tail(inode); 619 redirty_tail(inode);
639 continue; 620 continue;
640 } 621 }
641 622 if (sb != inode->i_sb)
623 /* finish with this superblock */
624 return 0;
642 if (inode->i_state & (I_NEW | I_WILL_FREE)) { 625 if (inode->i_state & (I_NEW | I_WILL_FREE)) {
643 requeue_io(inode); 626 requeue_io(inode);
644 continue; 627 continue;
645 } 628 }
646
647 /* 629 /*
648 * Was this inode dirtied after sync_sb_inodes was called? 630 * Was this inode dirtied after sync_sb_inodes was called?
649 * This keeps sync from extra jobs and livelock. 631 * This keeps sync from extra jobs and livelock.
650 */ 632 */
651 if (inode_dirtied_after(inode, start)) 633 if (inode_dirtied_after(inode, wbc->wb_start))
652 break; 634 return 1;
653
654 if (pin_sb_for_writeback(wbc, inode, &pin_sb)) {
655 requeue_io(inode);
656 continue;
657 }
658 635
659 BUG_ON(inode->i_state & (I_FREEING | I_CLEAR)); 636 BUG_ON(inode->i_state & (I_FREEING | I_CLEAR));
660 __iget(inode); 637 __iget(inode);
@@ -673,14 +650,50 @@ static void writeback_inodes_wb(struct bdi_writeback *wb,
673 spin_lock(&inode_lock); 650 spin_lock(&inode_lock);
674 if (wbc->nr_to_write <= 0) { 651 if (wbc->nr_to_write <= 0) {
675 wbc->more_io = 1; 652 wbc->more_io = 1;
676 break; 653 return 1;
677 } 654 }
678 if (!list_empty(&wb->b_more_io)) 655 if (!list_empty(&wb->b_more_io))
679 wbc->more_io = 1; 656 wbc->more_io = 1;
680 } 657 }
658 /* b_io is empty */
659 return 1;
660}
661
662static void writeback_inodes_wb(struct bdi_writeback *wb,
663 struct writeback_control *wbc)
664{
665 int ret = 0;
681 666
682 unpin_sb_for_writeback(&pin_sb); 667 wbc->wb_start = jiffies; /* livelock avoidance */
668 spin_lock(&inode_lock);
669 if (!wbc->for_kupdate || list_empty(&wb->b_io))
670 queue_io(wb, wbc->older_than_this);
671
672 while (!list_empty(&wb->b_io)) {
673 struct inode *inode = list_entry(wb->b_io.prev,
674 struct inode, i_list);
675 struct super_block *sb = inode->i_sb;
676 enum sb_pin_state state;
677
678 if (wbc->sb && sb != wbc->sb) {
679 /* super block given and doesn't
680 match, skip this inode */
681 redirty_tail(inode);
682 continue;
683 }
684 state = pin_sb_for_writeback(wbc, sb);
685
686 if (state == SB_PIN_FAILED) {
687 requeue_io(inode);
688 continue;
689 }
690 ret = writeback_sb_inodes(sb, wb, wbc);
683 691
692 if (state == SB_PINNED)
693 unpin_sb_for_writeback(sb);
694 if (ret)
695 break;
696 }
684 spin_unlock(&inode_lock); 697 spin_unlock(&inode_lock);
685 /* Leave any unwritten inodes on b_io */ 698 /* Leave any unwritten inodes on b_io */
686} 699}
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 76e8903cd204..36520ded3e06 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -34,6 +34,9 @@ struct writeback_control {
34 enum writeback_sync_modes sync_mode; 34 enum writeback_sync_modes sync_mode;
35 unsigned long *older_than_this; /* If !NULL, only write back inodes 35 unsigned long *older_than_this; /* If !NULL, only write back inodes
36 older than this */ 36 older than this */
37 unsigned long wb_start; /* Time writeback_inodes_wb was
38 called. This is needed to avoid
39 extra jobs and livelock */
37 long nr_to_write; /* Write this many pages, and decrement 40 long nr_to_write; /* Write this many pages, and decrement
38 this for each page written */ 41 this for each page written */
39 long pages_skipped; /* Pages which were not written */ 42 long pages_skipped; /* Pages which were not written */