aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorDavid Chinner <david@fromorbit.com>2008-10-30 02:07:29 -0400
committerLachlan McIlroy <lachlan@sgi.com>2008-10-30 02:07:29 -0400
commit683a897080a053733778b36398186cb1b22c377f (patch)
treebe084696bf3925a0bbeb37edf25e05168891732b /fs
parent2f8a3ce1c20f20e6494cdb77fed76bc474ca3ca5 (diff)
[XFS] Use the inode tree for finding dirty inodes
Update xfs_sync_inodes to walk the inode radix tree cache to find dirty inodes. This removes a huge bunch of nasty, messy code for traversing the mount inode list safely and removes another user of the mount inode list. Version 3 o rediff against new linux-2.6/xfs_sync.c code Version 2 o add comment explaining use of gang lookups for a single inode o use IRELE, not VN_RELE o move check for ag initialisation to caller. SGI-PV: 988139 SGI-Modid: xfs-linux-melb:xfs-kern:32290a Signed-off-by: David Chinner <david@fromorbit.com> Signed-off-by: Lachlan McIlroy <lachlan@sgi.com> Signed-off-by: Christoph Hellwig <hch@infradead.org>
Diffstat (limited to 'fs')
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c361
1 files changed, 101 insertions, 260 deletions
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index cd82ba523dc4..53d85ecb1d50 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -121,356 +121,197 @@ xfs_sync(
121} 121}
122 122
123/* 123/*
124 * xfs sync routine for internal use 124 * Sync all the inodes in the given AG according to the
125 * 125 * direction given by the flags.
126 * This routine supports all of the flags defined for the generic vfs_sync
127 * interface as explained above under xfs_sync.
128 *
129 */ 126 */
130int 127STATIC int
131xfs_sync_inodes( 128xfs_sync_inodes_ag(
132 xfs_mount_t *mp, 129 xfs_mount_t *mp,
130 int ag,
133 int flags, 131 int flags,
134 int *bypassed) 132 int *bypassed)
135{ 133{
136 xfs_inode_t *ip = NULL; 134 xfs_inode_t *ip = NULL;
137 struct inode *vp = NULL; 135 struct inode *vp = NULL;
138 int error; 136 xfs_perag_t *pag = &mp->m_perag[ag];
139 int last_error; 137 boolean_t vnode_refed = B_FALSE;
140 uint64_t fflag; 138 int nr_found;
141 uint lock_flags; 139 int first_index = 0;
142 uint base_lock_flags; 140 int error = 0;
143 boolean_t mount_locked; 141 int last_error = 0;
144 boolean_t vnode_refed; 142 int fflag = XFS_B_ASYNC;
145 int preempt; 143 int lock_flags = XFS_ILOCK_SHARED;
146 xfs_iptr_t *ipointer;
147#ifdef DEBUG
148 boolean_t ipointer_in = B_FALSE;
149
150#define IPOINTER_SET ipointer_in = B_TRUE
151#define IPOINTER_CLR ipointer_in = B_FALSE
152#else
153#define IPOINTER_SET
154#define IPOINTER_CLR
155#endif
156
157
158/* Insert a marker record into the inode list after inode ip. The list
159 * must be locked when this is called. After the call the list will no
160 * longer be locked.
161 */
162#define IPOINTER_INSERT(ip, mp) { \
163 ASSERT(ipointer_in == B_FALSE); \
164 ipointer->ip_mnext = ip->i_mnext; \
165 ipointer->ip_mprev = ip; \
166 ip->i_mnext = (xfs_inode_t *)ipointer; \
167 ipointer->ip_mnext->i_mprev = (xfs_inode_t *)ipointer; \
168 preempt = 0; \
169 XFS_MOUNT_IUNLOCK(mp); \
170 mount_locked = B_FALSE; \
171 IPOINTER_SET; \
172 }
173
174/* Remove the marker from the inode list. If the marker was the only item
175 * in the list then there are no remaining inodes and we should zero out
176 * the whole list. If we are the current head of the list then move the head
177 * past us.
178 */
179#define IPOINTER_REMOVE(ip, mp) { \
180 ASSERT(ipointer_in == B_TRUE); \
181 if (ipointer->ip_mnext != (xfs_inode_t *)ipointer) { \
182 ip = ipointer->ip_mnext; \
183 ip->i_mprev = ipointer->ip_mprev; \
184 ipointer->ip_mprev->i_mnext = ip; \
185 if (mp->m_inodes == (xfs_inode_t *)ipointer) { \
186 mp->m_inodes = ip; \
187 } \
188 } else { \
189 ASSERT(mp->m_inodes == (xfs_inode_t *)ipointer); \
190 mp->m_inodes = NULL; \
191 ip = NULL; \
192 } \
193 IPOINTER_CLR; \
194 }
195
196#define XFS_PREEMPT_MASK 0x7f
197
198 ASSERT(!(flags & SYNC_BDFLUSH));
199
200 if (bypassed)
201 *bypassed = 0;
202 if (mp->m_flags & XFS_MOUNT_RDONLY)
203 return 0;
204 error = 0;
205 last_error = 0;
206 preempt = 0;
207
208 /* Allocate a reference marker */
209 ipointer = (xfs_iptr_t *)kmem_zalloc(sizeof(xfs_iptr_t), KM_SLEEP);
210 144
211 fflag = XFS_B_ASYNC; /* default is don't wait */
212 if (flags & SYNC_DELWRI) 145 if (flags & SYNC_DELWRI)
213 fflag = XFS_B_DELWRI; 146 fflag = XFS_B_DELWRI;
214 if (flags & SYNC_WAIT) 147 if (flags & SYNC_WAIT)
215 fflag = 0; /* synchronous overrides all */ 148 fflag = 0; /* synchronous overrides all */
216 149
217 base_lock_flags = XFS_ILOCK_SHARED;
218 if (flags & (SYNC_DELWRI | SYNC_CLOSE)) { 150 if (flags & (SYNC_DELWRI | SYNC_CLOSE)) {
219 /* 151 /*
220 * We need the I/O lock if we're going to call any of 152 * We need the I/O lock if we're going to call any of
221 * the flush/inval routines. 153 * the flush/inval routines.
222 */ 154 */
223 base_lock_flags |= XFS_IOLOCK_SHARED; 155 lock_flags |= XFS_IOLOCK_SHARED;
224 } 156 }
225 157
226 XFS_MOUNT_ILOCK(mp);
227
228 ip = mp->m_inodes;
229
230 mount_locked = B_TRUE;
231 vnode_refed = B_FALSE;
232
233 IPOINTER_CLR;
234
235 do { 158 do {
236 ASSERT(ipointer_in == B_FALSE);
237 ASSERT(vnode_refed == B_FALSE);
238
239 lock_flags = base_lock_flags;
240
241 /* 159 /*
242 * There were no inodes in the list, just break out 160 * use a gang lookup to find the next inode in the tree
243 * of the loop. 161 * as the tree is sparse and a gang lookup walks to find
162 * the number of objects requested.
244 */ 163 */
245 if (ip == NULL) { 164 read_lock(&pag->pag_ici_lock);
246 break; 165 nr_found = radix_tree_gang_lookup(&pag->pag_ici_root,
247 } 166 (void**)&ip, first_index, 1);
248 167
249 /* 168 if (!nr_found) {
250 * We found another sync thread marker - skip it 169 read_unlock(&pag->pag_ici_lock);
251 */ 170 break;
252 if (ip->i_mount == NULL) {
253 ip = ip->i_mnext;
254 continue;
255 } 171 }
256 172
257 vp = VFS_I(ip); 173 /* update the index for the next lookup */
174 first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
258 175
259 /* 176 /*
260 * If the vnode is gone then this is being torn down, 177 * skip inodes in reclaim. Let xfs_syncsub do that for
261 * call reclaim if it is flushed, else let regular flush 178 * us so we don't need to worry.
262 * code deal with it later in the loop.
263 */ 179 */
264 180 vp = VFS_I(ip);
265 if (vp == NULL) { 181 if (!vp) {
266 /* Skip ones already in reclaim */ 182 read_unlock(&pag->pag_ici_lock);
267 if (ip->i_flags & XFS_IRECLAIM) {
268 ip = ip->i_mnext;
269 continue;
270 }
271 if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL) == 0) {
272 ip = ip->i_mnext;
273 } else if ((xfs_ipincount(ip) == 0) &&
274 xfs_iflock_nowait(ip)) {
275 IPOINTER_INSERT(ip, mp);
276
277 xfs_finish_reclaim(ip, 1,
278 XFS_IFLUSH_DELWRI_ELSE_ASYNC);
279
280 XFS_MOUNT_ILOCK(mp);
281 mount_locked = B_TRUE;
282 IPOINTER_REMOVE(ip, mp);
283 } else {
284 xfs_iunlock(ip, XFS_ILOCK_EXCL);
285 ip = ip->i_mnext;
286 }
287 continue; 183 continue;
288 } 184 }
289 185
186 /* bad inodes are dealt with elsewhere */
290 if (VN_BAD(vp)) { 187 if (VN_BAD(vp)) {
291 ip = ip->i_mnext; 188 read_unlock(&pag->pag_ici_lock);
292 continue; 189 continue;
293 } 190 }
294 191
192 /* nothing to sync during shutdown */
295 if (XFS_FORCED_SHUTDOWN(mp) && !(flags & SYNC_CLOSE)) { 193 if (XFS_FORCED_SHUTDOWN(mp) && !(flags & SYNC_CLOSE)) {
296 XFS_MOUNT_IUNLOCK(mp); 194 read_unlock(&pag->pag_ici_lock);
297 kmem_free(ipointer);
298 return 0; 195 return 0;
299 } 196 }
300 197
301 /* 198 /*
302 * Try to lock without sleeping. We're out of order with 199 * The inode lock here actually coordinates with the almost
303 * the inode list lock here, so if we fail we need to drop 200 * spurious inode lock in xfs_ireclaim() to prevent the vnode
304 * the mount lock and try again. If we're called from 201 * we handle here without a reference from being freed while we
305 * bdflush() here, then don't bother. 202 * reference it. If we lock the inode while it's on the mount
306 * 203 * list here, then the spurious inode lock in xfs_ireclaim()
307 * The inode lock here actually coordinates with the 204 * after the inode is pulled from the mount list will sleep
308 * almost spurious inode lock in xfs_ireclaim() to prevent 205 * until we release it here. This keeps the vnode from being
309 * the vnode we handle here without a reference from 206 * freed while we reference it.
310 * being freed while we reference it. If we lock the inode
311 * while it's on the mount list here, then the spurious inode
312 * lock in xfs_ireclaim() after the inode is pulled from
313 * the mount list will sleep until we release it here.
314 * This keeps the vnode from being freed while we reference
315 * it.
316 */ 207 */
317 if (xfs_ilock_nowait(ip, lock_flags) == 0) { 208 if (xfs_ilock_nowait(ip, lock_flags) == 0) {
318 if (vp == NULL) {
319 ip = ip->i_mnext;
320 continue;
321 }
322
323 vp = vn_grab(vp); 209 vp = vn_grab(vp);
324 if (vp == NULL) { 210 read_unlock(&pag->pag_ici_lock);
325 ip = ip->i_mnext; 211 if (!vp)
326 continue; 212 continue;
327 }
328
329 IPOINTER_INSERT(ip, mp);
330 xfs_ilock(ip, lock_flags); 213 xfs_ilock(ip, lock_flags);
331 214
332 ASSERT(vp == VFS_I(ip)); 215 ASSERT(vp == VFS_I(ip));
333 ASSERT(ip->i_mount == mp); 216 ASSERT(ip->i_mount == mp);
334 217
335 vnode_refed = B_TRUE; 218 vnode_refed = B_TRUE;
219 } else {
220 /* safe to unlock here as we have a reference */
221 read_unlock(&pag->pag_ici_lock);
336 } 222 }
337
338 /* From here on in the loop we may have a marker record
339 * in the inode list.
340 */
341
342 /* 223 /*
343 * If we have to flush data or wait for I/O completion 224 * If we have to flush data or wait for I/O completion
344 * we need to drop the ilock that we currently hold. 225 * we need to drop the ilock that we currently hold.
345 * If we need to drop the lock, insert a marker if we 226 * If we need to drop the lock, insert a marker if we
346 * have not already done so. 227 * have not already done so.
347 */ 228 */
348 if ((flags & (SYNC_CLOSE|SYNC_IOWAIT)) || 229 if (flags & SYNC_CLOSE) {
349 ((flags & SYNC_DELWRI) && VN_DIRTY(vp))) {
350 if (mount_locked) {
351 IPOINTER_INSERT(ip, mp);
352 }
353 xfs_iunlock(ip, XFS_ILOCK_SHARED); 230 xfs_iunlock(ip, XFS_ILOCK_SHARED);
354 231 if (XFS_FORCED_SHUTDOWN(mp))
355 if (flags & SYNC_CLOSE) { 232 xfs_tosspages(ip, 0, -1, FI_REMAPF);
356 /* Shutdown case. Flush and invalidate. */ 233 else
357 if (XFS_FORCED_SHUTDOWN(mp)) 234 error = xfs_flushinval_pages(ip, 0, -1,
358 xfs_tosspages(ip, 0, -1, 235 FI_REMAPF);
359 FI_REMAPF); 236 /* wait for I/O on freeze */
360 else
361 error = xfs_flushinval_pages(ip,
362 0, -1, FI_REMAPF);
363 } else if ((flags & SYNC_DELWRI) && VN_DIRTY(vp)) {
364 error = xfs_flush_pages(ip, 0,
365 -1, fflag, FI_NONE);
366 }
367
368 /*
369 * When freezing, we need to wait ensure all I/O (including direct
370 * I/O) is complete to ensure no further data modification can take
371 * place after this point
372 */
373 if (flags & SYNC_IOWAIT) 237 if (flags & SYNC_IOWAIT)
374 vn_iowait(ip); 238 vn_iowait(ip);
375 239
376 xfs_ilock(ip, XFS_ILOCK_SHARED); 240 xfs_ilock(ip, XFS_ILOCK_SHARED);
377 } 241 }
378 242
379 if ((flags & SYNC_ATTR) && 243 if ((flags & SYNC_DELWRI) && VN_DIRTY(vp)) {
380 (ip->i_update_core || 244 xfs_iunlock(ip, XFS_ILOCK_SHARED);
381 (ip->i_itemp && ip->i_itemp->ili_format.ilf_fields))) { 245 error = xfs_flush_pages(ip, 0, -1, fflag, FI_NONE);
382 if (mount_locked) 246 if (flags & SYNC_IOWAIT)
383 IPOINTER_INSERT(ip, mp); 247 vn_iowait(ip);
248 xfs_ilock(ip, XFS_ILOCK_SHARED);
249 }
384 250
251 if ((flags & SYNC_ATTR) && !xfs_inode_clean(ip)) {
385 if (flags & SYNC_WAIT) { 252 if (flags & SYNC_WAIT) {
386 xfs_iflock(ip); 253 xfs_iflock(ip);
387 error = xfs_iflush(ip, XFS_IFLUSH_SYNC); 254 if (!xfs_inode_clean(ip))
388 255 error = xfs_iflush(ip, XFS_IFLUSH_SYNC);
389 /* 256 else
390 * If we can't acquire the flush lock, then the inode 257 xfs_ifunlock(ip);
391 * is already being flushed so don't bother waiting.
392 *
393 * If we can lock it then do a delwri flush so we can
394 * combine multiple inode flushes in each disk write.
395 */
396 } else if (xfs_iflock_nowait(ip)) { 258 } else if (xfs_iflock_nowait(ip)) {
397 error = xfs_iflush(ip, XFS_IFLUSH_DELWRI); 259 if (!xfs_inode_clean(ip))
260 error = xfs_iflush(ip, XFS_IFLUSH_DELWRI);
261 else
262 xfs_ifunlock(ip);
398 } else if (bypassed) { 263 } else if (bypassed) {
399 (*bypassed)++; 264 (*bypassed)++;
400 } 265 }
401 } 266 }
402 267
403 if (lock_flags != 0) { 268 if (lock_flags)
404 xfs_iunlock(ip, lock_flags); 269 xfs_iunlock(ip, lock_flags);
405 }
406 270
407 if (vnode_refed) { 271 if (vnode_refed) {
408 /*
409 * If we had to take a reference on the vnode
410 * above, then wait until after we've unlocked
411 * the inode to release the reference. This is
412 * because we can be already holding the inode
413 * lock when IRELE() calls xfs_inactive().
414 *
415 * Make sure to drop the mount lock before calling
416 * IRELE() so that we don't trip over ourselves if
417 * we have to go for the mount lock again in the
418 * inactive code.
419 */
420 if (mount_locked) {
421 IPOINTER_INSERT(ip, mp);
422 }
423
424 IRELE(ip); 272 IRELE(ip);
425
426 vnode_refed = B_FALSE; 273 vnode_refed = B_FALSE;
427 } 274 }
428 275
429 if (error) { 276 if (error)
430 last_error = error; 277 last_error = error;
431 }
432
433 /* 278 /*
434 * bail out if the filesystem is corrupted. 279 * bail out if the filesystem is corrupted.
435 */ 280 */
436 if (error == EFSCORRUPTED) { 281 if (error == EFSCORRUPTED)
437 if (!mount_locked) {
438 XFS_MOUNT_ILOCK(mp);
439 IPOINTER_REMOVE(ip, mp);
440 }
441 XFS_MOUNT_IUNLOCK(mp);
442 ASSERT(ipointer_in == B_FALSE);
443 kmem_free(ipointer);
444 return XFS_ERROR(error); 282 return XFS_ERROR(error);
445 }
446
447 /* Let other threads have a chance at the mount lock
448 * if we have looped many times without dropping the
449 * lock.
450 */
451 if ((++preempt & XFS_PREEMPT_MASK) == 0) {
452 if (mount_locked) {
453 IPOINTER_INSERT(ip, mp);
454 }
455 }
456
457 if (mount_locked == B_FALSE) {
458 XFS_MOUNT_ILOCK(mp);
459 mount_locked = B_TRUE;
460 IPOINTER_REMOVE(ip, mp);
461 continue;
462 }
463 283
464 ASSERT(ipointer_in == B_FALSE); 284 } while (nr_found);
465 ip = ip->i_mnext;
466 285
467 } while (ip != mp->m_inodes); 286 return last_error;
287}
468 288
469 XFS_MOUNT_IUNLOCK(mp); 289int
290xfs_sync_inodes(
291 xfs_mount_t *mp,
292 int flags,
293 int *bypassed)
294{
295 int error;
296 int last_error;
297 int i;
470 298
471 ASSERT(ipointer_in == B_FALSE); 299 if (bypassed)
300 *bypassed = 0;
301 if (mp->m_flags & XFS_MOUNT_RDONLY)
302 return 0;
303 error = 0;
304 last_error = 0;
472 305
473 kmem_free(ipointer); 306 for (i = 0; i < mp->m_sb.sb_agcount; i++) {
307 if (!mp->m_perag[i].pag_ici_init)
308 continue;
309 error = xfs_sync_inodes_ag(mp, i, flags, bypassed);
310 if (error)
311 last_error = error;
312 if (error == EFSCORRUPTED)
313 break;
314 }
474 return XFS_ERROR(last_error); 315 return XFS_ERROR(last_error);
475} 316}
476 317