diff options
author | Christoph Hellwig <hch@infradead.org> | 2012-04-23 01:58:35 -0400 |
---|---|---|
committer | Ben Myers <bpm@sgi.com> | 2012-05-14 17:20:28 -0400 |
commit | 8a48088f6439249019b5e17f6391e710656879d9 (patch) | |
tree | b898076f1f2245dc435fa8e64b1ddac2b8424c7e /fs/xfs | |
parent | 211e4d434bd737be38aabad0247ce3da9964370e (diff) |
xfs: don't flush inodes from background inode reclaim
We already flush dirty inodes throug the AIL regularly, there is no reason
to have second thread compete with it and disturb the I/O pattern. We still
do write inodes when doing a synchronous reclaim from the shrinker or during
unmount for now.
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Mark Tinguely <tinguely@sgi.com>
Signed-off-by: Ben Myers <bpm@sgi.com>
Diffstat (limited to 'fs/xfs')
-rw-r--r-- | fs/xfs/xfs_sync.c | 102 |
1 files changed, 42 insertions, 60 deletions
diff --git a/fs/xfs/xfs_sync.c b/fs/xfs/xfs_sync.c index 85d03e6a2677..7b2bccc4d67b 100644 --- a/fs/xfs/xfs_sync.c +++ b/fs/xfs/xfs_sync.c | |||
@@ -631,11 +631,8 @@ xfs_reclaim_inode_grab( | |||
631 | } | 631 | } |
632 | 632 | ||
633 | /* | 633 | /* |
634 | * Inodes in different states need to be treated differently, and the return | 634 | * Inodes in different states need to be treated differently. The following |
635 | * value of xfs_iflush is not sufficient to get this right. The following table | 635 | * table lists the inode states and the reclaim actions necessary: |
636 | * lists the inode states and the reclaim actions necessary for non-blocking | ||
637 | * reclaim: | ||
638 | * | ||
639 | * | 636 | * |
640 | * inode state iflush ret required action | 637 | * inode state iflush ret required action |
641 | * --------------- ---------- --------------- | 638 | * --------------- ---------- --------------- |
@@ -645,9 +642,8 @@ xfs_reclaim_inode_grab( | |||
645 | * stale, unpinned 0 reclaim | 642 | * stale, unpinned 0 reclaim |
646 | * clean, pinned(*) 0 requeue | 643 | * clean, pinned(*) 0 requeue |
647 | * stale, pinned EAGAIN requeue | 644 | * stale, pinned EAGAIN requeue |
648 | * dirty, delwri ok 0 requeue | 645 | * dirty, async - requeue |
649 | * dirty, delwri blocked EAGAIN requeue | 646 | * dirty, sync 0 reclaim |
650 | * dirty, sync flush 0 reclaim | ||
651 | * | 647 | * |
652 | * (*) dgc: I don't think the clean, pinned state is possible but it gets | 648 | * (*) dgc: I don't think the clean, pinned state is possible but it gets |
653 | * handled anyway given the order of checks implemented. | 649 | * handled anyway given the order of checks implemented. |
@@ -658,26 +654,23 @@ xfs_reclaim_inode_grab( | |||
658 | * | 654 | * |
659 | * Also, because we get the flush lock first, we know that any inode that has | 655 | * Also, because we get the flush lock first, we know that any inode that has |
660 | * been flushed delwri has had the flush completed by the time we check that | 656 | * been flushed delwri has had the flush completed by the time we check that |
661 | * the inode is clean. The clean inode check needs to be done before flushing | 657 | * the inode is clean. |
662 | * the inode delwri otherwise we would loop forever requeuing clean inodes as | ||
663 | * we cannot tell apart a successful delwri flush and a clean inode from the | ||
664 | * return value of xfs_iflush(). | ||
665 | * | 658 | * |
666 | * Note that because the inode is flushed delayed write by background | 659 | * Note that because the inode is flushed delayed write by AIL pushing, the |
667 | * writeback, the flush lock may already be held here and waiting on it can | 660 | * flush lock may already be held here and waiting on it can result in very |
668 | * result in very long latencies. Hence for sync reclaims, where we wait on the | 661 | * long latencies. Hence for sync reclaims, where we wait on the flush lock, |
669 | * flush lock, the caller should push out delayed write inodes first before | 662 | * the caller should push the AIL first before trying to reclaim inodes to |
670 | * trying to reclaim them to minimise the amount of time spent waiting. For | 663 | * minimise the amount of time spent waiting. For background relaim, we only |
671 | * background relaim, we just requeue the inode for the next pass. | 664 | * bother to reclaim clean inodes anyway. |
672 | * | 665 | * |
673 | * Hence the order of actions after gaining the locks should be: | 666 | * Hence the order of actions after gaining the locks should be: |
674 | * bad => reclaim | 667 | * bad => reclaim |
675 | * shutdown => unpin and reclaim | 668 | * shutdown => unpin and reclaim |
676 | * pinned, delwri => requeue | 669 | * pinned, async => requeue |
677 | * pinned, sync => unpin | 670 | * pinned, sync => unpin |
678 | * stale => reclaim | 671 | * stale => reclaim |
679 | * clean => reclaim | 672 | * clean => reclaim |
680 | * dirty, delwri => flush and requeue | 673 | * dirty, async => requeue |
681 | * dirty, sync => flush, wait and reclaim | 674 | * dirty, sync => flush, wait and reclaim |
682 | */ | 675 | */ |
683 | STATIC int | 676 | STATIC int |
@@ -716,10 +709,8 @@ restart: | |||
716 | goto reclaim; | 709 | goto reclaim; |
717 | } | 710 | } |
718 | if (xfs_ipincount(ip)) { | 711 | if (xfs_ipincount(ip)) { |
719 | if (!(sync_mode & SYNC_WAIT)) { | 712 | if (!(sync_mode & SYNC_WAIT)) |
720 | xfs_ifunlock(ip); | 713 | goto out_ifunlock; |
721 | goto out; | ||
722 | } | ||
723 | xfs_iunpin_wait(ip); | 714 | xfs_iunpin_wait(ip); |
724 | } | 715 | } |
725 | if (xfs_iflags_test(ip, XFS_ISTALE)) | 716 | if (xfs_iflags_test(ip, XFS_ISTALE)) |
@@ -728,6 +719,13 @@ restart: | |||
728 | goto reclaim; | 719 | goto reclaim; |
729 | 720 | ||
730 | /* | 721 | /* |
722 | * Never flush out dirty data during non-blocking reclaim, as it would | ||
723 | * just contend with AIL pushing trying to do the same job. | ||
724 | */ | ||
725 | if (!(sync_mode & SYNC_WAIT)) | ||
726 | goto out_ifunlock; | ||
727 | |||
728 | /* | ||
731 | * Now we have an inode that needs flushing. | 729 | * Now we have an inode that needs flushing. |
732 | * | 730 | * |
733 | * We do a nonblocking flush here even if we are doing a SYNC_WAIT | 731 | * We do a nonblocking flush here even if we are doing a SYNC_WAIT |
@@ -745,42 +743,13 @@ restart: | |||
745 | * pass through will see the stale flag set on the inode. | 743 | * pass through will see the stale flag set on the inode. |
746 | */ | 744 | */ |
747 | error = xfs_iflush(ip, SYNC_TRYLOCK | sync_mode); | 745 | error = xfs_iflush(ip, SYNC_TRYLOCK | sync_mode); |
748 | if (sync_mode & SYNC_WAIT) { | 746 | if (error == EAGAIN) { |
749 | if (error == EAGAIN) { | 747 | xfs_iunlock(ip, XFS_ILOCK_EXCL); |
750 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 748 | /* backoff longer than in xfs_ifree_cluster */ |
751 | /* backoff longer than in xfs_ifree_cluster */ | 749 | delay(2); |
752 | delay(2); | 750 | goto restart; |
753 | goto restart; | ||
754 | } | ||
755 | xfs_iflock(ip); | ||
756 | goto reclaim; | ||
757 | } | ||
758 | |||
759 | /* | ||
760 | * When we have to flush an inode but don't have SYNC_WAIT set, we | ||
761 | * flush the inode out using a delwri buffer and wait for the next | ||
762 | * call into reclaim to find it in a clean state instead of waiting for | ||
763 | * it now. We also don't return errors here - if the error is transient | ||
764 | * then the next reclaim pass will flush the inode, and if the error | ||
765 | * is permanent then the next sync reclaim will reclaim the inode and | ||
766 | * pass on the error. | ||
767 | */ | ||
768 | if (error && error != EAGAIN && !XFS_FORCED_SHUTDOWN(ip->i_mount)) { | ||
769 | xfs_warn(ip->i_mount, | ||
770 | "inode 0x%llx background reclaim flush failed with %d", | ||
771 | (long long)ip->i_ino, error); | ||
772 | } | 751 | } |
773 | out: | 752 | xfs_iflock(ip); |
774 | xfs_iflags_clear(ip, XFS_IRECLAIM); | ||
775 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
776 | /* | ||
777 | * We could return EAGAIN here to make reclaim rescan the inode tree in | ||
778 | * a short while. However, this just burns CPU time scanning the tree | ||
779 | * waiting for IO to complete and xfssyncd never goes back to the idle | ||
780 | * state. Instead, return 0 to let the next scheduled background reclaim | ||
781 | * attempt to reclaim the inode again. | ||
782 | */ | ||
783 | return 0; | ||
784 | 753 | ||
785 | reclaim: | 754 | reclaim: |
786 | xfs_ifunlock(ip); | 755 | xfs_ifunlock(ip); |
@@ -814,8 +783,21 @@ reclaim: | |||
814 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 783 | xfs_iunlock(ip, XFS_ILOCK_EXCL); |
815 | 784 | ||
816 | xfs_inode_free(ip); | 785 | xfs_inode_free(ip); |
817 | |||
818 | return error; | 786 | return error; |
787 | |||
788 | out_ifunlock: | ||
789 | xfs_ifunlock(ip); | ||
790 | out: | ||
791 | xfs_iflags_clear(ip, XFS_IRECLAIM); | ||
792 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
793 | /* | ||
794 | * We could return EAGAIN here to make reclaim rescan the inode tree in | ||
795 | * a short while. However, this just burns CPU time scanning the tree | ||
796 | * waiting for IO to complete and xfssyncd never goes back to the idle | ||
797 | * state. Instead, return 0 to let the next scheduled background reclaim | ||
798 | * attempt to reclaim the inode again. | ||
799 | */ | ||
800 | return 0; | ||
819 | } | 801 | } |
820 | 802 | ||
821 | /* | 803 | /* |