diff options
author | Christoph Hellwig <hch@infradead.org> | 2012-04-23 01:58:34 -0400 |
---|---|---|
committer | Ben Myers <bpm@sgi.com> | 2012-05-14 17:20:27 -0400 |
commit | 211e4d434bd737be38aabad0247ce3da9964370e (patch) | |
tree | 6fb654a566507d302ff5672c2e66fb775ca3c6cd /fs/xfs/xfs_sync.c | |
parent | 1c30462542bac8abffb4823638b6b1659c1cfcf5 (diff) |
xfs: implement freezing by emptying the AIL
Now that we write back all metadata either synchronously or through
the AIL we can simply implement metadata freezing in terms of
emptying the AIL.
The implementation for this is fairly simply and straight-forward:
A new routine is added that asks the xfsaild to push the AIL to the
end and waits for it to complete and send a wakeup. The routine will
then loop if the AIL is not actually empty, and continue to do so
until the AIL is compeltely empty.
We keep an inode reclaim pass in the freeze process to avoid having
memory pressure have to reclaim inodes that require dirtying the
filesystem to be reclaimed after the freeze has completed. This
means we can also treat unmount in the exact same way as freeze.
As an upside we can now remove the radix tree based inode writeback
and xfs_unmountfs_writesb.
[ Dave Chinner:
- Cleaned up commit message.
- Added inode reclaim passes back into freeze.
- Cleaned up wakeup mechanism to avoid the use of a new
sleep counter variable. ]
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Mark Tinguely <tinguely@sgi.com>
Signed-off-by: Ben Myers <bpm@sgi.com>
Diffstat (limited to 'fs/xfs/xfs_sync.c')
-rw-r--r-- | fs/xfs/xfs_sync.c | 96 |
1 files changed, 13 insertions, 83 deletions
diff --git a/fs/xfs/xfs_sync.c b/fs/xfs/xfs_sync.c index 7648776e0a9e..85d03e6a2677 100644 --- a/fs/xfs/xfs_sync.c +++ b/fs/xfs/xfs_sync.c | |||
@@ -241,45 +241,6 @@ xfs_sync_inode_data( | |||
241 | return error; | 241 | return error; |
242 | } | 242 | } |
243 | 243 | ||
244 | STATIC int | ||
245 | xfs_sync_inode_attr( | ||
246 | struct xfs_inode *ip, | ||
247 | struct xfs_perag *pag, | ||
248 | int flags) | ||
249 | { | ||
250 | int error = 0; | ||
251 | |||
252 | xfs_ilock(ip, XFS_ILOCK_SHARED); | ||
253 | if (xfs_inode_clean(ip)) | ||
254 | goto out_unlock; | ||
255 | if (!xfs_iflock_nowait(ip)) { | ||
256 | if (!(flags & SYNC_WAIT)) | ||
257 | goto out_unlock; | ||
258 | xfs_iflock(ip); | ||
259 | } | ||
260 | |||
261 | if (xfs_inode_clean(ip)) { | ||
262 | xfs_ifunlock(ip); | ||
263 | goto out_unlock; | ||
264 | } | ||
265 | |||
266 | error = xfs_iflush(ip, flags); | ||
267 | |||
268 | /* | ||
269 | * We don't want to try again on non-blocking flushes that can't run | ||
270 | * again immediately. If an inode really must be written, then that's | ||
271 | * what the SYNC_WAIT flag is for. | ||
272 | */ | ||
273 | if (error == EAGAIN) { | ||
274 | ASSERT(!(flags & SYNC_WAIT)); | ||
275 | error = 0; | ||
276 | } | ||
277 | |||
278 | out_unlock: | ||
279 | xfs_iunlock(ip, XFS_ILOCK_SHARED); | ||
280 | return error; | ||
281 | } | ||
282 | |||
283 | /* | 244 | /* |
284 | * Write out pagecache data for the whole filesystem. | 245 | * Write out pagecache data for the whole filesystem. |
285 | */ | 246 | */ |
@@ -300,19 +261,6 @@ xfs_sync_data( | |||
300 | return 0; | 261 | return 0; |
301 | } | 262 | } |
302 | 263 | ||
303 | /* | ||
304 | * Write out inode metadata (attributes) for the whole filesystem. | ||
305 | */ | ||
306 | STATIC int | ||
307 | xfs_sync_attr( | ||
308 | struct xfs_mount *mp, | ||
309 | int flags) | ||
310 | { | ||
311 | ASSERT((flags & ~SYNC_WAIT) == 0); | ||
312 | |||
313 | return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags); | ||
314 | } | ||
315 | |||
316 | STATIC int | 264 | STATIC int |
317 | xfs_sync_fsdata( | 265 | xfs_sync_fsdata( |
318 | struct xfs_mount *mp) | 266 | struct xfs_mount *mp) |
@@ -350,7 +298,7 @@ xfs_sync_fsdata( | |||
350 | * First stage of freeze - no writers will make progress now we are here, | 298 | * First stage of freeze - no writers will make progress now we are here, |
351 | * so we flush delwri and delalloc buffers here, then wait for all I/O to | 299 | * so we flush delwri and delalloc buffers here, then wait for all I/O to |
352 | * complete. Data is frozen at that point. Metadata is not frozen, | 300 | * complete. Data is frozen at that point. Metadata is not frozen, |
353 | * transactions can still occur here so don't bother flushing the buftarg | 301 | * transactions can still occur here so don't bother emptying the AIL |
354 | * because it'll just get dirty again. | 302 | * because it'll just get dirty again. |
355 | */ | 303 | */ |
356 | int | 304 | int |
@@ -379,33 +327,6 @@ xfs_quiesce_data( | |||
379 | return error ? error : error2; | 327 | return error ? error : error2; |
380 | } | 328 | } |
381 | 329 | ||
382 | STATIC void | ||
383 | xfs_quiesce_fs( | ||
384 | struct xfs_mount *mp) | ||
385 | { | ||
386 | int count = 0, pincount; | ||
387 | |||
388 | xfs_reclaim_inodes(mp, 0); | ||
389 | xfs_flush_buftarg(mp->m_ddev_targp, 0); | ||
390 | |||
391 | /* | ||
392 | * This loop must run at least twice. The first instance of the loop | ||
393 | * will flush most meta data but that will generate more meta data | ||
394 | * (typically directory updates). Which then must be flushed and | ||
395 | * logged before we can write the unmount record. We also so sync | ||
396 | * reclaim of inodes to catch any that the above delwri flush skipped. | ||
397 | */ | ||
398 | do { | ||
399 | xfs_reclaim_inodes(mp, SYNC_WAIT); | ||
400 | xfs_sync_attr(mp, SYNC_WAIT); | ||
401 | pincount = xfs_flush_buftarg(mp->m_ddev_targp, 1); | ||
402 | if (!pincount) { | ||
403 | delay(50); | ||
404 | count++; | ||
405 | } | ||
406 | } while (count < 2); | ||
407 | } | ||
408 | |||
409 | /* | 330 | /* |
410 | * Second stage of a quiesce. The data is already synced, now we have to take | 331 | * Second stage of a quiesce. The data is already synced, now we have to take |
411 | * care of the metadata. New transactions are already blocked, so we need to | 332 | * care of the metadata. New transactions are already blocked, so we need to |
@@ -421,8 +342,12 @@ xfs_quiesce_attr( | |||
421 | while (atomic_read(&mp->m_active_trans) > 0) | 342 | while (atomic_read(&mp->m_active_trans) > 0) |
422 | delay(100); | 343 | delay(100); |
423 | 344 | ||
424 | /* flush inodes and push all remaining buffers out to disk */ | 345 | /* reclaim inodes to do any IO before the freeze completes */ |
425 | xfs_quiesce_fs(mp); | 346 | xfs_reclaim_inodes(mp, 0); |
347 | xfs_reclaim_inodes(mp, SYNC_WAIT); | ||
348 | |||
349 | /* flush all pending changes from the AIL */ | ||
350 | xfs_ail_push_all_sync(mp->m_ail); | ||
426 | 351 | ||
427 | /* | 352 | /* |
428 | * Just warn here till VFS can correctly support | 353 | * Just warn here till VFS can correctly support |
@@ -436,7 +361,12 @@ xfs_quiesce_attr( | |||
436 | xfs_warn(mp, "xfs_attr_quiesce: failed to log sb changes. " | 361 | xfs_warn(mp, "xfs_attr_quiesce: failed to log sb changes. " |
437 | "Frozen image may not be consistent."); | 362 | "Frozen image may not be consistent."); |
438 | xfs_log_unmount_write(mp); | 363 | xfs_log_unmount_write(mp); |
439 | xfs_unmountfs_writesb(mp); | 364 | |
365 | /* | ||
366 | * At this point we might have modified the superblock again and thus | ||
367 | * added an item to the AIL, thus flush it again. | ||
368 | */ | ||
369 | xfs_ail_push_all_sync(mp->m_ail); | ||
440 | } | 370 | } |
441 | 371 | ||
442 | static void | 372 | static void |