aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs
diff options
context:
space:
mode:
authorChristoph Hellwig <hch@infradead.org>2012-04-23 01:58:35 -0400
committerBen Myers <bpm@sgi.com>2012-05-14 17:20:28 -0400
commit8a48088f6439249019b5e17f6391e710656879d9 (patch)
treeb898076f1f2245dc435fa8e64b1ddac2b8424c7e /fs/xfs
parent211e4d434bd737be38aabad0247ce3da9964370e (diff)
xfs: don't flush inodes from background inode reclaim
We already flush dirty inodes throug the AIL regularly, there is no reason to have second thread compete with it and disturb the I/O pattern. We still do write inodes when doing a synchronous reclaim from the shrinker or during unmount for now. Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Mark Tinguely <tinguely@sgi.com> Signed-off-by: Ben Myers <bpm@sgi.com>
Diffstat (limited to 'fs/xfs')
-rw-r--r--fs/xfs/xfs_sync.c102
1 files changed, 42 insertions, 60 deletions
diff --git a/fs/xfs/xfs_sync.c b/fs/xfs/xfs_sync.c
index 85d03e6a2677..7b2bccc4d67b 100644
--- a/fs/xfs/xfs_sync.c
+++ b/fs/xfs/xfs_sync.c
@@ -631,11 +631,8 @@ xfs_reclaim_inode_grab(
631} 631}
632 632
633/* 633/*
634 * Inodes in different states need to be treated differently, and the return 634 * Inodes in different states need to be treated differently. The following
635 * value of xfs_iflush is not sufficient to get this right. The following table 635 * table lists the inode states and the reclaim actions necessary:
636 * lists the inode states and the reclaim actions necessary for non-blocking
637 * reclaim:
638 *
639 * 636 *
640 * inode state iflush ret required action 637 * inode state iflush ret required action
641 * --------------- ---------- --------------- 638 * --------------- ---------- ---------------
@@ -645,9 +642,8 @@ xfs_reclaim_inode_grab(
645 * stale, unpinned 0 reclaim 642 * stale, unpinned 0 reclaim
646 * clean, pinned(*) 0 requeue 643 * clean, pinned(*) 0 requeue
647 * stale, pinned EAGAIN requeue 644 * stale, pinned EAGAIN requeue
648 * dirty, delwri ok 0 requeue 645 * dirty, async - requeue
649 * dirty, delwri blocked EAGAIN requeue 646 * dirty, sync 0 reclaim
650 * dirty, sync flush 0 reclaim
651 * 647 *
652 * (*) dgc: I don't think the clean, pinned state is possible but it gets 648 * (*) dgc: I don't think the clean, pinned state is possible but it gets
653 * handled anyway given the order of checks implemented. 649 * handled anyway given the order of checks implemented.
@@ -658,26 +654,23 @@ xfs_reclaim_inode_grab(
658 * 654 *
659 * Also, because we get the flush lock first, we know that any inode that has 655 * Also, because we get the flush lock first, we know that any inode that has
660 * been flushed delwri has had the flush completed by the time we check that 656 * been flushed delwri has had the flush completed by the time we check that
661 * the inode is clean. The clean inode check needs to be done before flushing 657 * the inode is clean.
662 * the inode delwri otherwise we would loop forever requeuing clean inodes as
663 * we cannot tell apart a successful delwri flush and a clean inode from the
664 * return value of xfs_iflush().
665 * 658 *
666 * Note that because the inode is flushed delayed write by background 659 * Note that because the inode is flushed delayed write by AIL pushing, the
667 * writeback, the flush lock may already be held here and waiting on it can 660 * flush lock may already be held here and waiting on it can result in very
668 * result in very long latencies. Hence for sync reclaims, where we wait on the 661 * long latencies. Hence for sync reclaims, where we wait on the flush lock,
669 * flush lock, the caller should push out delayed write inodes first before 662 * the caller should push the AIL first before trying to reclaim inodes to
670 * trying to reclaim them to minimise the amount of time spent waiting. For 663 * minimise the amount of time spent waiting. For background relaim, we only
671 * background relaim, we just requeue the inode for the next pass. 664 * bother to reclaim clean inodes anyway.
672 * 665 *
673 * Hence the order of actions after gaining the locks should be: 666 * Hence the order of actions after gaining the locks should be:
674 * bad => reclaim 667 * bad => reclaim
675 * shutdown => unpin and reclaim 668 * shutdown => unpin and reclaim
676 * pinned, delwri => requeue 669 * pinned, async => requeue
677 * pinned, sync => unpin 670 * pinned, sync => unpin
678 * stale => reclaim 671 * stale => reclaim
679 * clean => reclaim 672 * clean => reclaim
680 * dirty, delwri => flush and requeue 673 * dirty, async => requeue
681 * dirty, sync => flush, wait and reclaim 674 * dirty, sync => flush, wait and reclaim
682 */ 675 */
683STATIC int 676STATIC int
@@ -716,10 +709,8 @@ restart:
716 goto reclaim; 709 goto reclaim;
717 } 710 }
718 if (xfs_ipincount(ip)) { 711 if (xfs_ipincount(ip)) {
719 if (!(sync_mode & SYNC_WAIT)) { 712 if (!(sync_mode & SYNC_WAIT))
720 xfs_ifunlock(ip); 713 goto out_ifunlock;
721 goto out;
722 }
723 xfs_iunpin_wait(ip); 714 xfs_iunpin_wait(ip);
724 } 715 }
725 if (xfs_iflags_test(ip, XFS_ISTALE)) 716 if (xfs_iflags_test(ip, XFS_ISTALE))
@@ -728,6 +719,13 @@ restart:
728 goto reclaim; 719 goto reclaim;
729 720
730 /* 721 /*
722 * Never flush out dirty data during non-blocking reclaim, as it would
723 * just contend with AIL pushing trying to do the same job.
724 */
725 if (!(sync_mode & SYNC_WAIT))
726 goto out_ifunlock;
727
728 /*
731 * Now we have an inode that needs flushing. 729 * Now we have an inode that needs flushing.
732 * 730 *
733 * We do a nonblocking flush here even if we are doing a SYNC_WAIT 731 * We do a nonblocking flush here even if we are doing a SYNC_WAIT
@@ -745,42 +743,13 @@ restart:
745 * pass through will see the stale flag set on the inode. 743 * pass through will see the stale flag set on the inode.
746 */ 744 */
747 error = xfs_iflush(ip, SYNC_TRYLOCK | sync_mode); 745 error = xfs_iflush(ip, SYNC_TRYLOCK | sync_mode);
748 if (sync_mode & SYNC_WAIT) { 746 if (error == EAGAIN) {
749 if (error == EAGAIN) { 747 xfs_iunlock(ip, XFS_ILOCK_EXCL);
750 xfs_iunlock(ip, XFS_ILOCK_EXCL); 748 /* backoff longer than in xfs_ifree_cluster */
751 /* backoff longer than in xfs_ifree_cluster */ 749 delay(2);
752 delay(2); 750 goto restart;
753 goto restart;
754 }
755 xfs_iflock(ip);
756 goto reclaim;
757 }
758
759 /*
760 * When we have to flush an inode but don't have SYNC_WAIT set, we
761 * flush the inode out using a delwri buffer and wait for the next
762 * call into reclaim to find it in a clean state instead of waiting for
763 * it now. We also don't return errors here - if the error is transient
764 * then the next reclaim pass will flush the inode, and if the error
765 * is permanent then the next sync reclaim will reclaim the inode and
766 * pass on the error.
767 */
768 if (error && error != EAGAIN && !XFS_FORCED_SHUTDOWN(ip->i_mount)) {
769 xfs_warn(ip->i_mount,
770 "inode 0x%llx background reclaim flush failed with %d",
771 (long long)ip->i_ino, error);
772 } 751 }
773out: 752 xfs_iflock(ip);
774 xfs_iflags_clear(ip, XFS_IRECLAIM);
775 xfs_iunlock(ip, XFS_ILOCK_EXCL);
776 /*
777 * We could return EAGAIN here to make reclaim rescan the inode tree in
778 * a short while. However, this just burns CPU time scanning the tree
779 * waiting for IO to complete and xfssyncd never goes back to the idle
780 * state. Instead, return 0 to let the next scheduled background reclaim
781 * attempt to reclaim the inode again.
782 */
783 return 0;
784 753
785reclaim: 754reclaim:
786 xfs_ifunlock(ip); 755 xfs_ifunlock(ip);
@@ -814,8 +783,21 @@ reclaim:
814 xfs_iunlock(ip, XFS_ILOCK_EXCL); 783 xfs_iunlock(ip, XFS_ILOCK_EXCL);
815 784
816 xfs_inode_free(ip); 785 xfs_inode_free(ip);
817
818 return error; 786 return error;
787
788out_ifunlock:
789 xfs_ifunlock(ip);
790out:
791 xfs_iflags_clear(ip, XFS_IRECLAIM);
792 xfs_iunlock(ip, XFS_ILOCK_EXCL);
793 /*
794 * We could return EAGAIN here to make reclaim rescan the inode tree in
795 * a short while. However, this just burns CPU time scanning the tree
796 * waiting for IO to complete and xfssyncd never goes back to the idle
797 * state. Instead, return 0 to let the next scheduled background reclaim
798 * attempt to reclaim the inode again.
799 */
800 return 0;
819} 801}
820 802
821/* 803/*