diff options
author | Dave Chinner <david@fromorbit.com> | 2010-02-05 20:37:26 -0500 |
---|---|---|
committer | Dave Chinner <david@fromorbit.com> | 2010-02-05 20:37:26 -0500 |
commit | 777df5afdb26c71634edd60582be620ff94e87a0 (patch) | |
tree | 26ed86f1ec114250230e4e090be18980c94ce73f /fs/xfs | |
parent | d5db0f97fbbeff11c88dec1aaf1536a975afbaeb (diff) |
xfs: Make inode reclaim states explicit
A.K.A.: don't rely on xfs_iflush() return value in reclaim
We have gradually been moving checks out of the reclaim code because
they are duplicated in xfs_iflush(). We've had a history of problems
in this area, and many of them stem from the overloading of the
return values from xfs_iflush() and interaction with inode flush
locking to determine if the inode is safe to reclaim.
With the desire to move to delayed write flushing of inodes and
non-blocking inode tree reclaim walks, the overloading of the
return value of xfs_iflush makes it very difficult to determine
the correct thing to do next.
This patch explicitly re-adds the checks to the inode reclaim code,
removing the reliance on the return value of xfs_iflush() to
determine what to do next. It also means that we can clearly
document all the inode states that reclaim must handle and hence
we can easily see that we handled all the necessary cases.
This also removes the need for the xfs_inode_clean() check in
xfs_iflush() as all callers now check this first (safely).
Signed-off-by: Dave Chinner <david@fromorbit.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Diffstat (limited to 'fs/xfs')
-rw-r--r-- | fs/xfs/linux-2.6/xfs_sync.c | 81 | ||||
-rw-r--r-- | fs/xfs/xfs_inode.c | 11 | ||||
-rw-r--r-- | fs/xfs/xfs_inode.h | 1 |
3 files changed, 64 insertions, 29 deletions
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index c9b863eacab7..525260c7617f 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c | |||
@@ -706,12 +706,43 @@ __xfs_inode_clear_reclaim_tag( | |||
706 | XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG); | 706 | XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG); |
707 | } | 707 | } |
708 | 708 | ||
709 | /* | ||
710 | * Inodes in different states need to be treated differently, and the return | ||
711 | * value of xfs_iflush is not sufficient to get this right. The following table | ||
712 | * lists the inode states and the reclaim actions necessary for non-blocking | ||
713 | * reclaim: | ||
714 | * | ||
715 | * | ||
716 | * inode state iflush ret required action | ||
717 | * --------------- ---------- --------------- | ||
718 | * bad - reclaim | ||
719 | * shutdown EIO unpin and reclaim | ||
720 | * clean, unpinned 0 reclaim | ||
721 | * stale, unpinned 0 reclaim | ||
722 | * clean, pinned(*) 0 unpin and reclaim | ||
723 | * stale, pinned 0 unpin and reclaim | ||
724 | * dirty, async 0 block on flush lock, reclaim | ||
725 | * dirty, sync flush 0 block on flush lock, reclaim | ||
726 | * | ||
727 | * (*) dgc: I don't think the clean, pinned state is possible but it gets | ||
728 | * handled anyway given the order of checks implemented. | ||
729 | * | ||
730 | * Hence the order of actions after gaining the locks should be: | ||
731 | * bad => reclaim | ||
732 | * shutdown => unpin and reclaim | ||
733 | * pinned => unpin | ||
734 | * stale => reclaim | ||
735 | * clean => reclaim | ||
736 | * dirty => flush, wait and reclaim | ||
737 | */ | ||
709 | STATIC int | 738 | STATIC int |
710 | xfs_reclaim_inode( | 739 | xfs_reclaim_inode( |
711 | struct xfs_inode *ip, | 740 | struct xfs_inode *ip, |
712 | struct xfs_perag *pag, | 741 | struct xfs_perag *pag, |
713 | int sync_mode) | 742 | int sync_mode) |
714 | { | 743 | { |
744 | int error; | ||
745 | |||
715 | /* | 746 | /* |
716 | * The radix tree lock here protects a thread in xfs_iget from racing | 747 | * The radix tree lock here protects a thread in xfs_iget from racing |
717 | * with us starting reclaim on the inode. Once we have the | 748 | * with us starting reclaim on the inode. Once we have the |
@@ -729,30 +760,42 @@ xfs_reclaim_inode( | |||
729 | spin_unlock(&ip->i_flags_lock); | 760 | spin_unlock(&ip->i_flags_lock); |
730 | write_unlock(&pag->pag_ici_lock); | 761 | write_unlock(&pag->pag_ici_lock); |
731 | 762 | ||
732 | /* | ||
733 | * If the inode is still dirty, then flush it out. If the inode | ||
734 | * is not in the AIL, then it will be OK to flush it delwri as | ||
735 | * long as xfs_iflush() does not keep any references to the inode. | ||
736 | * We leave that decision up to xfs_iflush() since it has the | ||
737 | * knowledge of whether it's OK to simply do a delwri flush of | ||
738 | * the inode or whether we need to wait until the inode is | ||
739 | * pulled from the AIL. | ||
740 | * We get the flush lock regardless, though, just to make sure | ||
741 | * we don't free it while it is being flushed. | ||
742 | */ | ||
743 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 763 | xfs_ilock(ip, XFS_ILOCK_EXCL); |
744 | xfs_iflock(ip); | 764 | xfs_iflock(ip); |
745 | 765 | ||
746 | /* | 766 | if (is_bad_inode(VFS_I(ip))) |
747 | * In the case of a forced shutdown we rely on xfs_iflush() to | 767 | goto reclaim; |
748 | * wait for the inode to be unpinned before returning an error. | 768 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { |
749 | */ | 769 | xfs_iunpin_wait(ip); |
750 | if (!is_bad_inode(VFS_I(ip)) && xfs_iflush(ip, sync_mode) == 0) { | 770 | goto reclaim; |
751 | /* synchronize with xfs_iflush_done */ | 771 | } |
752 | xfs_iflock(ip); | 772 | if (xfs_ipincount(ip)) |
753 | xfs_ifunlock(ip); | 773 | xfs_iunpin_wait(ip); |
774 | if (xfs_iflags_test(ip, XFS_ISTALE)) | ||
775 | goto reclaim; | ||
776 | if (xfs_inode_clean(ip)) | ||
777 | goto reclaim; | ||
778 | |||
779 | /* Now we have an inode that needs flushing */ | ||
780 | error = xfs_iflush(ip, sync_mode); | ||
781 | if (!error) { | ||
782 | switch(sync_mode) { | ||
783 | case XFS_IFLUSH_DELWRI_ELSE_ASYNC: | ||
784 | case XFS_IFLUSH_DELWRI: | ||
785 | case XFS_IFLUSH_ASYNC: | ||
786 | case XFS_IFLUSH_DELWRI_ELSE_SYNC: | ||
787 | case XFS_IFLUSH_SYNC: | ||
788 | /* IO issued, synchronise with IO completion */ | ||
789 | xfs_iflock(ip); | ||
790 | break; | ||
791 | default: | ||
792 | ASSERT(0); | ||
793 | break; | ||
794 | } | ||
754 | } | 795 | } |
755 | 796 | ||
797 | reclaim: | ||
798 | xfs_ifunlock(ip); | ||
756 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 799 | xfs_iunlock(ip, XFS_ILOCK_EXCL); |
757 | xfs_ireclaim(ip); | 800 | xfs_ireclaim(ip); |
758 | return 0; | 801 | return 0; |
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index d0d1b5a05183..8d0666dd170a 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c | |||
@@ -2493,7 +2493,7 @@ __xfs_iunpin_wait( | |||
2493 | wait_event(ip->i_ipin_wait, (atomic_read(&ip->i_pincount) == 0)); | 2493 | wait_event(ip->i_ipin_wait, (atomic_read(&ip->i_pincount) == 0)); |
2494 | } | 2494 | } |
2495 | 2495 | ||
2496 | static inline void | 2496 | void |
2497 | xfs_iunpin_wait( | 2497 | xfs_iunpin_wait( |
2498 | xfs_inode_t *ip) | 2498 | xfs_inode_t *ip) |
2499 | { | 2499 | { |
@@ -2849,15 +2849,6 @@ xfs_iflush( | |||
2849 | mp = ip->i_mount; | 2849 | mp = ip->i_mount; |
2850 | 2850 | ||
2851 | /* | 2851 | /* |
2852 | * If the inode isn't dirty, then just release the inode flush lock and | ||
2853 | * do nothing. | ||
2854 | */ | ||
2855 | if (xfs_inode_clean(ip)) { | ||
2856 | xfs_ifunlock(ip); | ||
2857 | return 0; | ||
2858 | } | ||
2859 | |||
2860 | /* | ||
2861 | * We can't flush the inode until it is unpinned, so wait for it if we | 2852 | * We can't flush the inode until it is unpinned, so wait for it if we |
2862 | * are allowed to block. We know noone new can pin it, because we are | 2853 | * are allowed to block. We know noone new can pin it, because we are |
2863 | * holding the inode lock shared and you need to hold it exclusively to | 2854 | * holding the inode lock shared and you need to hold it exclusively to |
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index ec1f28c4fc4f..8b618ea4d692 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h | |||
@@ -483,6 +483,7 @@ int xfs_iunlink(struct xfs_trans *, xfs_inode_t *); | |||
483 | void xfs_iext_realloc(xfs_inode_t *, int, int); | 483 | void xfs_iext_realloc(xfs_inode_t *, int, int); |
484 | void xfs_ipin(xfs_inode_t *); | 484 | void xfs_ipin(xfs_inode_t *); |
485 | void xfs_iunpin(xfs_inode_t *); | 485 | void xfs_iunpin(xfs_inode_t *); |
486 | void xfs_iunpin_wait(xfs_inode_t *); | ||
486 | int xfs_iflush(xfs_inode_t *, uint); | 487 | int xfs_iflush(xfs_inode_t *, uint); |
487 | void xfs_ichgtime(xfs_inode_t *, int); | 488 | void xfs_ichgtime(xfs_inode_t *, int); |
488 | void xfs_lock_inodes(xfs_inode_t **, int, uint); | 489 | void xfs_lock_inodes(xfs_inode_t **, int, uint); |