diff options
author | Edward Shishkin <edward.shishkin@gmail.com> | 2010-03-11 17:09:47 -0500 |
---|---|---|
committer | Jens Axboe <jens.axboe@oracle.com> | 2010-03-12 04:03:42 -0500 |
commit | f11c9c5c259cb2c3d698548dc3936f773ab1f5b9 (patch) | |
tree | c6461c9d6981122e1507dafa0394901903eb2ca1 | |
parent | c12ec0a2d94001003dfb929ce14c287fca0522b0 (diff) |
vfs: improve writeback_inodes_wb()
Do not pin/unpin superblock for every inode in writeback_inodes_wb(), pin
it for the whole group of inodes which belong to the same superblock and
call writeback_sb_inodes() handler for them.
Signed-off-by: Edward Shishkin <edward.shishkin@gmail.com>
Cc: Jens Axboe <jens.axboe@oracle.com>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
-rw-r--r-- | fs/fs-writeback.c | 133 | ||||
-rw-r--r-- | include/linux/writeback.h | 3 |
2 files changed, 76 insertions, 60 deletions
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 76fc4d594acb..6841effa47ca 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
@@ -553,108 +553,85 @@ select_queue: | |||
553 | return ret; | 553 | return ret; |
554 | } | 554 | } |
555 | 555 | ||
556 | static void unpin_sb_for_writeback(struct super_block **psb) | 556 | static void unpin_sb_for_writeback(struct super_block *sb) |
557 | { | 557 | { |
558 | struct super_block *sb = *psb; | 558 | up_read(&sb->s_umount); |
559 | 559 | put_super(sb); | |
560 | if (sb) { | ||
561 | up_read(&sb->s_umount); | ||
562 | put_super(sb); | ||
563 | *psb = NULL; | ||
564 | } | ||
565 | } | 560 | } |
566 | 561 | ||
562 | enum sb_pin_state { | ||
563 | SB_PINNED, | ||
564 | SB_NOT_PINNED, | ||
565 | SB_PIN_FAILED | ||
566 | }; | ||
567 | |||
567 | /* | 568 | /* |
568 | * For WB_SYNC_NONE writeback, the caller does not have the sb pinned | 569 | * For WB_SYNC_NONE writeback, the caller does not have the sb pinned |
569 | * before calling writeback. So make sure that we do pin it, so it doesn't | 570 | * before calling writeback. So make sure that we do pin it, so it doesn't |
570 | * go away while we are writing inodes from it. | 571 | * go away while we are writing inodes from it. |
571 | * | ||
572 | * Returns 0 if the super was successfully pinned (or pinning wasn't needed), | ||
573 | * 1 if we failed. | ||
574 | */ | 572 | */ |
575 | static int pin_sb_for_writeback(struct writeback_control *wbc, | 573 | static enum sb_pin_state pin_sb_for_writeback(struct writeback_control *wbc, |
576 | struct inode *inode, struct super_block **psb) | 574 | struct super_block *sb) |
577 | { | 575 | { |
578 | struct super_block *sb = inode->i_sb; | ||
579 | |||
580 | /* | ||
581 | * If this sb is already pinned, nothing more to do. If not and | ||
582 | * *psb is non-NULL, unpin the old one first | ||
583 | */ | ||
584 | if (sb == *psb) | ||
585 | return 0; | ||
586 | else if (*psb) | ||
587 | unpin_sb_for_writeback(psb); | ||
588 | |||
589 | /* | 576 | /* |
590 | * Caller must already hold the ref for this | 577 | * Caller must already hold the ref for this |
591 | */ | 578 | */ |
592 | if (wbc->sync_mode == WB_SYNC_ALL) { | 579 | if (wbc->sync_mode == WB_SYNC_ALL) { |
593 | WARN_ON(!rwsem_is_locked(&sb->s_umount)); | 580 | WARN_ON(!rwsem_is_locked(&sb->s_umount)); |
594 | return 0; | 581 | return SB_NOT_PINNED; |
595 | } | 582 | } |
596 | |||
597 | spin_lock(&sb_lock); | 583 | spin_lock(&sb_lock); |
598 | sb->s_count++; | 584 | sb->s_count++; |
599 | if (down_read_trylock(&sb->s_umount)) { | 585 | if (down_read_trylock(&sb->s_umount)) { |
600 | if (sb->s_root) { | 586 | if (sb->s_root) { |
601 | spin_unlock(&sb_lock); | 587 | spin_unlock(&sb_lock); |
602 | goto pinned; | 588 | return SB_PINNED; |
603 | } | 589 | } |
604 | /* | 590 | /* |
605 | * umounted, drop rwsem again and fall through to failure | 591 | * umounted, drop rwsem again and fall through to failure |
606 | */ | 592 | */ |
607 | up_read(&sb->s_umount); | 593 | up_read(&sb->s_umount); |
608 | } | 594 | } |
609 | |||
610 | sb->s_count--; | 595 | sb->s_count--; |
611 | spin_unlock(&sb_lock); | 596 | spin_unlock(&sb_lock); |
612 | return 1; | 597 | return SB_PIN_FAILED; |
613 | pinned: | ||
614 | *psb = sb; | ||
615 | return 0; | ||
616 | } | 598 | } |
617 | 599 | ||
618 | static void writeback_inodes_wb(struct bdi_writeback *wb, | 600 | /* |
619 | struct writeback_control *wbc) | 601 | * Write a portion of b_io inodes which belong to @sb. |
602 | * If @wbc->sb != NULL, then find and write all such | ||
603 | * inodes. Otherwise write only ones which go sequentially | ||
604 | * in reverse order. | ||
605 | * Return 1, if the caller writeback routine should be | ||
606 | * interrupted. Otherwise return 0. | ||
607 | */ | ||
608 | static int writeback_sb_inodes(struct super_block *sb, | ||
609 | struct bdi_writeback *wb, | ||
610 | struct writeback_control *wbc) | ||
620 | { | 611 | { |
621 | struct super_block *sb = wbc->sb, *pin_sb = NULL; | ||
622 | const unsigned long start = jiffies; /* livelock avoidance */ | ||
623 | |||
624 | spin_lock(&inode_lock); | ||
625 | |||
626 | if (!wbc->for_kupdate || list_empty(&wb->b_io)) | ||
627 | queue_io(wb, wbc->older_than_this); | ||
628 | |||
629 | while (!list_empty(&wb->b_io)) { | 612 | while (!list_empty(&wb->b_io)) { |
630 | struct inode *inode = list_entry(wb->b_io.prev, | ||
631 | struct inode, i_list); | ||
632 | long pages_skipped; | 613 | long pages_skipped; |
633 | 614 | struct inode *inode = list_entry(wb->b_io.prev, | |
634 | /* | 615 | struct inode, i_list); |
635 | * super block given and doesn't match, skip this inode | 616 | if (wbc->sb && sb != inode->i_sb) { |
636 | */ | 617 | /* super block given and doesn't |
637 | if (sb && sb != inode->i_sb) { | 618 | match, skip this inode */ |
638 | redirty_tail(inode); | 619 | redirty_tail(inode); |
639 | continue; | 620 | continue; |
640 | } | 621 | } |
641 | 622 | if (sb != inode->i_sb) | |
623 | /* finish with this superblock */ | ||
624 | return 0; | ||
642 | if (inode->i_state & (I_NEW | I_WILL_FREE)) { | 625 | if (inode->i_state & (I_NEW | I_WILL_FREE)) { |
643 | requeue_io(inode); | 626 | requeue_io(inode); |
644 | continue; | 627 | continue; |
645 | } | 628 | } |
646 | |||
647 | /* | 629 | /* |
648 | * Was this inode dirtied after sync_sb_inodes was called? | 630 | * Was this inode dirtied after sync_sb_inodes was called? |
649 | * This keeps sync from extra jobs and livelock. | 631 | * This keeps sync from extra jobs and livelock. |
650 | */ | 632 | */ |
651 | if (inode_dirtied_after(inode, start)) | 633 | if (inode_dirtied_after(inode, wbc->wb_start)) |
652 | break; | 634 | return 1; |
653 | |||
654 | if (pin_sb_for_writeback(wbc, inode, &pin_sb)) { | ||
655 | requeue_io(inode); | ||
656 | continue; | ||
657 | } | ||
658 | 635 | ||
659 | BUG_ON(inode->i_state & (I_FREEING | I_CLEAR)); | 636 | BUG_ON(inode->i_state & (I_FREEING | I_CLEAR)); |
660 | __iget(inode); | 637 | __iget(inode); |
@@ -673,14 +650,50 @@ static void writeback_inodes_wb(struct bdi_writeback *wb, | |||
673 | spin_lock(&inode_lock); | 650 | spin_lock(&inode_lock); |
674 | if (wbc->nr_to_write <= 0) { | 651 | if (wbc->nr_to_write <= 0) { |
675 | wbc->more_io = 1; | 652 | wbc->more_io = 1; |
676 | break; | 653 | return 1; |
677 | } | 654 | } |
678 | if (!list_empty(&wb->b_more_io)) | 655 | if (!list_empty(&wb->b_more_io)) |
679 | wbc->more_io = 1; | 656 | wbc->more_io = 1; |
680 | } | 657 | } |
658 | /* b_io is empty */ | ||
659 | return 1; | ||
660 | } | ||
661 | |||
662 | static void writeback_inodes_wb(struct bdi_writeback *wb, | ||
663 | struct writeback_control *wbc) | ||
664 | { | ||
665 | int ret = 0; | ||
681 | 666 | ||
682 | unpin_sb_for_writeback(&pin_sb); | 667 | wbc->wb_start = jiffies; /* livelock avoidance */ |
668 | spin_lock(&inode_lock); | ||
669 | if (!wbc->for_kupdate || list_empty(&wb->b_io)) | ||
670 | queue_io(wb, wbc->older_than_this); | ||
671 | |||
672 | while (!list_empty(&wb->b_io)) { | ||
673 | struct inode *inode = list_entry(wb->b_io.prev, | ||
674 | struct inode, i_list); | ||
675 | struct super_block *sb = inode->i_sb; | ||
676 | enum sb_pin_state state; | ||
677 | |||
678 | if (wbc->sb && sb != wbc->sb) { | ||
679 | /* super block given and doesn't | ||
680 | match, skip this inode */ | ||
681 | redirty_tail(inode); | ||
682 | continue; | ||
683 | } | ||
684 | state = pin_sb_for_writeback(wbc, sb); | ||
685 | |||
686 | if (state == SB_PIN_FAILED) { | ||
687 | requeue_io(inode); | ||
688 | continue; | ||
689 | } | ||
690 | ret = writeback_sb_inodes(sb, wb, wbc); | ||
683 | 691 | ||
692 | if (state == SB_PINNED) | ||
693 | unpin_sb_for_writeback(sb); | ||
694 | if (ret) | ||
695 | break; | ||
696 | } | ||
684 | spin_unlock(&inode_lock); | 697 | spin_unlock(&inode_lock); |
685 | /* Leave any unwritten inodes on b_io */ | 698 | /* Leave any unwritten inodes on b_io */ |
686 | } | 699 | } |
diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 76e8903cd204..36520ded3e06 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h | |||
@@ -34,6 +34,9 @@ struct writeback_control { | |||
34 | enum writeback_sync_modes sync_mode; | 34 | enum writeback_sync_modes sync_mode; |
35 | unsigned long *older_than_this; /* If !NULL, only write back inodes | 35 | unsigned long *older_than_this; /* If !NULL, only write back inodes |
36 | older than this */ | 36 | older than this */ |
37 | unsigned long wb_start; /* Time writeback_inodes_wb was | ||
38 | called. This is needed to avoid | ||
39 | extra jobs and livelock */ | ||
37 | long nr_to_write; /* Write this many pages, and decrement | 40 | long nr_to_write; /* Write this many pages, and decrement |
38 | this for each page written */ | 41 | this for each page written */ |
39 | long pages_skipped; /* Pages which were not written */ | 42 | long pages_skipped; /* Pages which were not written */ |