diff options
author | David Chinner <david@fromorbit.com> | 2008-10-30 03:03:14 -0400 |
---|---|---|
committer | Lachlan McIlroy <lachlan@redback.melbourne.sgi.com> | 2008-10-30 03:03:14 -0400 |
commit | 455486b9ccdd0a1d7432a03302f549b1c917c181 (patch) | |
tree | b8f9068c75cb627341ccde4856170f7689b6bab2 | |
parent | 56e73ec47d749047f441e6b9d60d964535d31c3b (diff) |
[XFS] avoid all reclaimable inodes in xfs_sync_inodes_ag
If we are syncing data in xfs_sync_inodes_ag(), the VFS inode must still
be referencable as the dirty data state is carried on the VFS inode. hence
if we can't get a reference via igrab(), the inode must be in reclaim
which implies that it has no dirty data attached.
Leave such inodes to the reclaim code to flush the dirty inode state to
disk and so avoid attempting to access the VFS inode when it may not exist
in xfs_sync_inodes_ag().
Version 4:
o don't reference linux inode until after igrab() succeeds
Version 3:
o converted unlock/rele to an xfs_iput() call.
Version 2:
o change igrab logic to be more linear
o remove initial reclaimable inode check now that we are using
igrab() failure to find reclaimable inodes
o assert that igrab failure occurs only on reclaimable inodes
o clean up inode locking - only grab the iolock if we are doing
a SYNC_DELWRI call and we have a dirty inode.
SGI-PV: 987246
SGI-Modid: xfs-linux-melb:xfs-kern:32391a
Signed-off-by: David Chinner <david@fromorbit.com>
Signed-off-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Peter Leckie <pleckie@sgi.com>
Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>
-rw-r--r-- | fs/xfs/linux-2.6/xfs_sync.c | 75 |
1 files changed, 18 insertions, 57 deletions
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index ee1648b179f7..fb5cca3df840 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c | |||
@@ -63,25 +63,16 @@ xfs_sync_inodes_ag( | |||
63 | int error = 0; | 63 | int error = 0; |
64 | int last_error = 0; | 64 | int last_error = 0; |
65 | int fflag = XFS_B_ASYNC; | 65 | int fflag = XFS_B_ASYNC; |
66 | int lock_flags = XFS_ILOCK_SHARED; | ||
67 | 66 | ||
68 | if (flags & SYNC_DELWRI) | 67 | if (flags & SYNC_DELWRI) |
69 | fflag = XFS_B_DELWRI; | 68 | fflag = XFS_B_DELWRI; |
70 | if (flags & SYNC_WAIT) | 69 | if (flags & SYNC_WAIT) |
71 | fflag = 0; /* synchronous overrides all */ | 70 | fflag = 0; /* synchronous overrides all */ |
72 | 71 | ||
73 | if (flags & SYNC_DELWRI) { | ||
74 | /* | ||
75 | * We need the I/O lock if we're going to call any of | ||
76 | * the flush/inval routines. | ||
77 | */ | ||
78 | lock_flags |= XFS_IOLOCK_SHARED; | ||
79 | } | ||
80 | |||
81 | do { | 72 | do { |
82 | struct inode *inode; | 73 | struct inode *inode; |
83 | boolean_t inode_refed; | ||
84 | xfs_inode_t *ip = NULL; | 74 | xfs_inode_t *ip = NULL; |
75 | int lock_flags = XFS_ILOCK_SHARED; | ||
85 | 76 | ||
86 | /* | 77 | /* |
87 | * use a gang lookup to find the next inode in the tree | 78 | * use a gang lookup to find the next inode in the tree |
@@ -109,22 +100,6 @@ xfs_sync_inodes_ag( | |||
109 | break; | 100 | break; |
110 | } | 101 | } |
111 | 102 | ||
112 | /* | ||
113 | * skip inodes in reclaim. Let xfs_syncsub do that for | ||
114 | * us so we don't need to worry. | ||
115 | */ | ||
116 | if (xfs_iflags_test(ip, (XFS_IRECLAIM|XFS_IRECLAIMABLE))) { | ||
117 | read_unlock(&pag->pag_ici_lock); | ||
118 | continue; | ||
119 | } | ||
120 | |||
121 | /* bad inodes are dealt with elsewhere */ | ||
122 | inode = VFS_I(ip); | ||
123 | if (is_bad_inode(inode)) { | ||
124 | read_unlock(&pag->pag_ici_lock); | ||
125 | continue; | ||
126 | } | ||
127 | |||
128 | /* nothing to sync during shutdown */ | 103 | /* nothing to sync during shutdown */ |
129 | if (XFS_FORCED_SHUTDOWN(mp)) { | 104 | if (XFS_FORCED_SHUTDOWN(mp)) { |
130 | read_unlock(&pag->pag_ici_lock); | 105 | read_unlock(&pag->pag_ici_lock); |
@@ -132,42 +107,34 @@ xfs_sync_inodes_ag( | |||
132 | } | 107 | } |
133 | 108 | ||
134 | /* | 109 | /* |
135 | * If we can't get a reference on the VFS_I, the inode must be | 110 | * If we can't get a reference on the inode, it must be |
136 | * in reclaim. If we can get the inode lock without blocking, | 111 | * in reclaim. Leave it for the reclaim code to flush. |
137 | * it is safe to flush the inode because we hold the tree lock | ||
138 | * and xfs_iextract will block right now. Hence if we lock the | ||
139 | * inode while holding the tree lock, xfs_ireclaim() is | ||
140 | * guaranteed to block on the inode lock we now hold and hence | ||
141 | * it is safe to reference the inode until we drop the inode | ||
142 | * locks completely. | ||
143 | */ | 112 | */ |
144 | inode_refed = B_FALSE; | 113 | inode = VFS_I(ip); |
145 | if (igrab(inode)) { | 114 | if (!igrab(inode)) { |
146 | read_unlock(&pag->pag_ici_lock); | ||
147 | xfs_ilock(ip, lock_flags); | ||
148 | inode_refed = B_TRUE; | ||
149 | } else { | ||
150 | if (!xfs_ilock_nowait(ip, lock_flags)) { | ||
151 | /* leave it to reclaim */ | ||
152 | read_unlock(&pag->pag_ici_lock); | ||
153 | continue; | ||
154 | } | ||
155 | read_unlock(&pag->pag_ici_lock); | 115 | read_unlock(&pag->pag_ici_lock); |
116 | continue; | ||
117 | } | ||
118 | read_unlock(&pag->pag_ici_lock); | ||
119 | |||
120 | /* bad inodes are dealt with elsewhere */ | ||
121 | if (is_bad_inode(inode)) { | ||
122 | IRELE(ip); | ||
123 | continue; | ||
156 | } | 124 | } |
157 | 125 | ||
158 | /* | 126 | /* |
159 | * If we have to flush data or wait for I/O completion | 127 | * If we have to flush data or wait for I/O completion |
160 | * we need to drop the ilock that we currently hold. | 128 | * we need to hold the iolock. |
161 | * If we need to drop the lock, insert a marker if we | ||
162 | * have not already done so. | ||
163 | */ | 129 | */ |
164 | if ((flags & SYNC_DELWRI) && VN_DIRTY(inode)) { | 130 | if ((flags & SYNC_DELWRI) && VN_DIRTY(inode)) { |
165 | xfs_iunlock(ip, XFS_ILOCK_SHARED); | 131 | xfs_ilock(ip, XFS_IOLOCK_SHARED); |
132 | lock_flags |= XFS_IOLOCK_SHARED; | ||
166 | error = xfs_flush_pages(ip, 0, -1, fflag, FI_NONE); | 133 | error = xfs_flush_pages(ip, 0, -1, fflag, FI_NONE); |
167 | if (flags & SYNC_IOWAIT) | 134 | if (flags & SYNC_IOWAIT) |
168 | vn_iowait(ip); | 135 | vn_iowait(ip); |
169 | xfs_ilock(ip, XFS_ILOCK_SHARED); | ||
170 | } | 136 | } |
137 | xfs_ilock(ip, XFS_ILOCK_SHARED); | ||
171 | 138 | ||
172 | if ((flags & SYNC_ATTR) && !xfs_inode_clean(ip)) { | 139 | if ((flags & SYNC_ATTR) && !xfs_inode_clean(ip)) { |
173 | if (flags & SYNC_WAIT) { | 140 | if (flags & SYNC_WAIT) { |
@@ -183,13 +150,7 @@ xfs_sync_inodes_ag( | |||
183 | xfs_ifunlock(ip); | 150 | xfs_ifunlock(ip); |
184 | } | 151 | } |
185 | } | 152 | } |
186 | 153 | xfs_iput(ip, lock_flags); | |
187 | if (lock_flags) | ||
188 | xfs_iunlock(ip, lock_flags); | ||
189 | |||
190 | if (inode_refed) { | ||
191 | IRELE(ip); | ||
192 | } | ||
193 | 154 | ||
194 | if (error) | 155 | if (error) |
195 | last_error = error; | 156 | last_error = error; |