aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorChristoph Hellwig <hch@infradead.org>2012-04-23 01:58:34 -0400
committerBen Myers <bpm@sgi.com>2012-05-14 17:20:27 -0400
commit211e4d434bd737be38aabad0247ce3da9964370e (patch)
tree6fb654a566507d302ff5672c2e66fb775ca3c6cd /fs
parent1c30462542bac8abffb4823638b6b1659c1cfcf5 (diff)
xfs: implement freezing by emptying the AIL
Now that we write back all metadata either synchronously or through the AIL we can simply implement metadata freezing in terms of emptying the AIL. The implementation for this is fairly simply and straight-forward: A new routine is added that asks the xfsaild to push the AIL to the end and waits for it to complete and send a wakeup. The routine will then loop if the AIL is not actually empty, and continue to do so until the AIL is compeltely empty. We keep an inode reclaim pass in the freeze process to avoid having memory pressure have to reclaim inodes that require dirtying the filesystem to be reclaimed after the freeze has completed. This means we can also treat unmount in the exact same way as freeze. As an upside we can now remove the radix tree based inode writeback and xfs_unmountfs_writesb. [ Dave Chinner: - Cleaned up commit message. - Added inode reclaim passes back into freeze. - Cleaned up wakeup mechanism to avoid the use of a new sleep counter variable. ] Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Mark Tinguely <tinguely@sgi.com> Signed-off-by: Ben Myers <bpm@sgi.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/xfs/xfs_mount.c56
-rw-r--r--fs/xfs/xfs_mount.h1
-rw-r--r--fs/xfs/xfs_sync.c96
-rw-r--r--fs/xfs/xfs_trans_ail.c36
-rw-r--r--fs/xfs/xfs_trans_priv.h2
5 files changed, 56 insertions, 135 deletions
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 1ffead4b2296..385a3b11d3dd 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -22,6 +22,7 @@
22#include "xfs_log.h" 22#include "xfs_log.h"
23#include "xfs_inum.h" 23#include "xfs_inum.h"
24#include "xfs_trans.h" 24#include "xfs_trans.h"
25#include "xfs_trans_priv.h"
25#include "xfs_sb.h" 26#include "xfs_sb.h"
26#include "xfs_ag.h" 27#include "xfs_ag.h"
27#include "xfs_dir2.h" 28#include "xfs_dir2.h"
@@ -1475,15 +1476,15 @@ xfs_unmountfs(
1475 xfs_log_force(mp, XFS_LOG_SYNC); 1476 xfs_log_force(mp, XFS_LOG_SYNC);
1476 1477
1477 /* 1478 /*
1478 * Do a delwri reclaim pass first so that as many dirty inodes are 1479 * Flush all pending changes from the AIL.
1479 * queued up for IO as possible. Then flush the buffers before making 1480 */
1480 * a synchronous path to catch all the remaining inodes are reclaimed. 1481 xfs_ail_push_all_sync(mp->m_ail);
1481 * This makes the reclaim process as quick as possible by avoiding 1482
1482 * synchronous writeout and blocking on inodes already in the delwri 1483 /*
1483 * state as much as possible. 1484 * And reclaim all inodes. At this point there should be no dirty
1485 * inode, and none should be pinned or locked, but use synchronous
1486 * reclaim just to be sure.
1484 */ 1487 */
1485 xfs_reclaim_inodes(mp, 0);
1486 xfs_flush_buftarg(mp->m_ddev_targp, 1);
1487 xfs_reclaim_inodes(mp, SYNC_WAIT); 1488 xfs_reclaim_inodes(mp, SYNC_WAIT);
1488 1489
1489 xfs_qm_unmount(mp); 1490 xfs_qm_unmount(mp);
@@ -1519,15 +1520,12 @@ xfs_unmountfs(
1519 if (error) 1520 if (error)
1520 xfs_warn(mp, "Unable to update superblock counters. " 1521 xfs_warn(mp, "Unable to update superblock counters. "
1521 "Freespace may not be correct on next mount."); 1522 "Freespace may not be correct on next mount.");
1522 xfs_unmountfs_writesb(mp);
1523 1523
1524 /* 1524 /*
1525 * Make sure all buffers have been flushed and completed before 1525 * At this point we might have modified the superblock again and thus
1526 * unmounting the log. 1526 * added an item to the AIL, thus flush it again.
1527 */ 1527 */
1528 error = xfs_flush_buftarg(mp->m_ddev_targp, 1); 1528 xfs_ail_push_all_sync(mp->m_ail);
1529 if (error)
1530 xfs_warn(mp, "%d busy buffers during unmount.", error);
1531 xfs_wait_buftarg(mp->m_ddev_targp); 1529 xfs_wait_buftarg(mp->m_ddev_targp);
1532 1530
1533 xfs_log_unmount_write(mp); 1531 xfs_log_unmount_write(mp);
@@ -1588,36 +1586,6 @@ xfs_log_sbcount(xfs_mount_t *mp)
1588 return error; 1586 return error;
1589} 1587}
1590 1588
1591int
1592xfs_unmountfs_writesb(xfs_mount_t *mp)
1593{
1594 xfs_buf_t *sbp;
1595 int error = 0;
1596
1597 /*
1598 * skip superblock write if fs is read-only, or
1599 * if we are doing a forced umount.
1600 */
1601 if (!((mp->m_flags & XFS_MOUNT_RDONLY) ||
1602 XFS_FORCED_SHUTDOWN(mp))) {
1603
1604 sbp = xfs_getsb(mp, 0);
1605
1606 XFS_BUF_UNDONE(sbp);
1607 XFS_BUF_UNREAD(sbp);
1608 xfs_buf_delwri_dequeue(sbp);
1609 XFS_BUF_WRITE(sbp);
1610 XFS_BUF_UNASYNC(sbp);
1611 ASSERT(sbp->b_target == mp->m_ddev_targp);
1612 xfsbdstrat(mp, sbp);
1613 error = xfs_buf_iowait(sbp);
1614 if (error)
1615 xfs_buf_ioerror_alert(sbp, __func__);
1616 xfs_buf_relse(sbp);
1617 }
1618 return error;
1619}
1620
1621/* 1589/*
1622 * xfs_mod_sb() can be used to copy arbitrary changes to the 1590 * xfs_mod_sb() can be used to copy arbitrary changes to the
1623 * in-core superblock into the superblock buffer to be logged. 1591 * in-core superblock into the superblock buffer to be logged.
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 9eba73887829..19fd5eda92b8 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -378,7 +378,6 @@ extern __uint64_t xfs_default_resblks(xfs_mount_t *mp);
378extern int xfs_mountfs(xfs_mount_t *mp); 378extern int xfs_mountfs(xfs_mount_t *mp);
379 379
380extern void xfs_unmountfs(xfs_mount_t *); 380extern void xfs_unmountfs(xfs_mount_t *);
381extern int xfs_unmountfs_writesb(xfs_mount_t *);
382extern int xfs_mod_incore_sb(xfs_mount_t *, xfs_sb_field_t, int64_t, int); 381extern int xfs_mod_incore_sb(xfs_mount_t *, xfs_sb_field_t, int64_t, int);
383extern int xfs_mod_incore_sb_batch(xfs_mount_t *, xfs_mod_sb_t *, 382extern int xfs_mod_incore_sb_batch(xfs_mount_t *, xfs_mod_sb_t *,
384 uint, int); 383 uint, int);
diff --git a/fs/xfs/xfs_sync.c b/fs/xfs/xfs_sync.c
index 7648776e0a9e..85d03e6a2677 100644
--- a/fs/xfs/xfs_sync.c
+++ b/fs/xfs/xfs_sync.c
@@ -241,45 +241,6 @@ xfs_sync_inode_data(
241 return error; 241 return error;
242} 242}
243 243
244STATIC int
245xfs_sync_inode_attr(
246 struct xfs_inode *ip,
247 struct xfs_perag *pag,
248 int flags)
249{
250 int error = 0;
251
252 xfs_ilock(ip, XFS_ILOCK_SHARED);
253 if (xfs_inode_clean(ip))
254 goto out_unlock;
255 if (!xfs_iflock_nowait(ip)) {
256 if (!(flags & SYNC_WAIT))
257 goto out_unlock;
258 xfs_iflock(ip);
259 }
260
261 if (xfs_inode_clean(ip)) {
262 xfs_ifunlock(ip);
263 goto out_unlock;
264 }
265
266 error = xfs_iflush(ip, flags);
267
268 /*
269 * We don't want to try again on non-blocking flushes that can't run
270 * again immediately. If an inode really must be written, then that's
271 * what the SYNC_WAIT flag is for.
272 */
273 if (error == EAGAIN) {
274 ASSERT(!(flags & SYNC_WAIT));
275 error = 0;
276 }
277
278 out_unlock:
279 xfs_iunlock(ip, XFS_ILOCK_SHARED);
280 return error;
281}
282
283/* 244/*
284 * Write out pagecache data for the whole filesystem. 245 * Write out pagecache data for the whole filesystem.
285 */ 246 */
@@ -300,19 +261,6 @@ xfs_sync_data(
300 return 0; 261 return 0;
301} 262}
302 263
303/*
304 * Write out inode metadata (attributes) for the whole filesystem.
305 */
306STATIC int
307xfs_sync_attr(
308 struct xfs_mount *mp,
309 int flags)
310{
311 ASSERT((flags & ~SYNC_WAIT) == 0);
312
313 return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags);
314}
315
316STATIC int 264STATIC int
317xfs_sync_fsdata( 265xfs_sync_fsdata(
318 struct xfs_mount *mp) 266 struct xfs_mount *mp)
@@ -350,7 +298,7 @@ xfs_sync_fsdata(
350 * First stage of freeze - no writers will make progress now we are here, 298 * First stage of freeze - no writers will make progress now we are here,
351 * so we flush delwri and delalloc buffers here, then wait for all I/O to 299 * so we flush delwri and delalloc buffers here, then wait for all I/O to
352 * complete. Data is frozen at that point. Metadata is not frozen, 300 * complete. Data is frozen at that point. Metadata is not frozen,
353 * transactions can still occur here so don't bother flushing the buftarg 301 * transactions can still occur here so don't bother emptying the AIL
354 * because it'll just get dirty again. 302 * because it'll just get dirty again.
355 */ 303 */
356int 304int
@@ -379,33 +327,6 @@ xfs_quiesce_data(
379 return error ? error : error2; 327 return error ? error : error2;
380} 328}
381 329
382STATIC void
383xfs_quiesce_fs(
384 struct xfs_mount *mp)
385{
386 int count = 0, pincount;
387
388 xfs_reclaim_inodes(mp, 0);
389 xfs_flush_buftarg(mp->m_ddev_targp, 0);
390
391 /*
392 * This loop must run at least twice. The first instance of the loop
393 * will flush most meta data but that will generate more meta data
394 * (typically directory updates). Which then must be flushed and
395 * logged before we can write the unmount record. We also so sync
396 * reclaim of inodes to catch any that the above delwri flush skipped.
397 */
398 do {
399 xfs_reclaim_inodes(mp, SYNC_WAIT);
400 xfs_sync_attr(mp, SYNC_WAIT);
401 pincount = xfs_flush_buftarg(mp->m_ddev_targp, 1);
402 if (!pincount) {
403 delay(50);
404 count++;
405 }
406 } while (count < 2);
407}
408
409/* 330/*
410 * Second stage of a quiesce. The data is already synced, now we have to take 331 * Second stage of a quiesce. The data is already synced, now we have to take
411 * care of the metadata. New transactions are already blocked, so we need to 332 * care of the metadata. New transactions are already blocked, so we need to
@@ -421,8 +342,12 @@ xfs_quiesce_attr(
421 while (atomic_read(&mp->m_active_trans) > 0) 342 while (atomic_read(&mp->m_active_trans) > 0)
422 delay(100); 343 delay(100);
423 344
424 /* flush inodes and push all remaining buffers out to disk */ 345 /* reclaim inodes to do any IO before the freeze completes */
425 xfs_quiesce_fs(mp); 346 xfs_reclaim_inodes(mp, 0);
347 xfs_reclaim_inodes(mp, SYNC_WAIT);
348
349 /* flush all pending changes from the AIL */
350 xfs_ail_push_all_sync(mp->m_ail);
426 351
427 /* 352 /*
428 * Just warn here till VFS can correctly support 353 * Just warn here till VFS can correctly support
@@ -436,7 +361,12 @@ xfs_quiesce_attr(
436 xfs_warn(mp, "xfs_attr_quiesce: failed to log sb changes. " 361 xfs_warn(mp, "xfs_attr_quiesce: failed to log sb changes. "
437 "Frozen image may not be consistent."); 362 "Frozen image may not be consistent.");
438 xfs_log_unmount_write(mp); 363 xfs_log_unmount_write(mp);
439 xfs_unmountfs_writesb(mp); 364
365 /*
366 * At this point we might have modified the superblock again and thus
367 * added an item to the AIL, thus flush it again.
368 */
369 xfs_ail_push_all_sync(mp->m_ail);
440} 370}
441 371
442static void 372static void
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index 77acc53f2f31..0425ca16738b 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -383,9 +383,8 @@ xfsaild_push(
383 spin_lock(&ailp->xa_lock); 383 spin_lock(&ailp->xa_lock);
384 } 384 }
385 385
386 target = ailp->xa_target;
387 lip = xfs_trans_ail_cursor_first(ailp, &cur, ailp->xa_last_pushed_lsn); 386 lip = xfs_trans_ail_cursor_first(ailp, &cur, ailp->xa_last_pushed_lsn);
388 if (!lip || XFS_FORCED_SHUTDOWN(mp)) { 387 if (!lip) {
389 /* 388 /*
390 * AIL is empty or our push has reached the end. 389 * AIL is empty or our push has reached the end.
391 */ 390 */
@@ -408,6 +407,7 @@ xfsaild_push(
408 * lots of contention on the AIL lists. 407 * lots of contention on the AIL lists.
409 */ 408 */
410 lsn = lip->li_lsn; 409 lsn = lip->li_lsn;
410 target = ailp->xa_target;
411 while ((XFS_LSN_CMP(lip->li_lsn, target) <= 0)) { 411 while ((XFS_LSN_CMP(lip->li_lsn, target) <= 0)) {
412 int lock_result; 412 int lock_result;
413 /* 413 /*
@@ -466,11 +466,6 @@ xfsaild_push(
466 } 466 }
467 467
468 spin_lock(&ailp->xa_lock); 468 spin_lock(&ailp->xa_lock);
469 /* should we bother continuing? */
470 if (XFS_FORCED_SHUTDOWN(mp))
471 break;
472 ASSERT(mp->m_log);
473
474 count++; 469 count++;
475 470
476 /* 471 /*
@@ -611,6 +606,30 @@ xfs_ail_push_all(
611} 606}
612 607
613/* 608/*
609 * Push out all items in the AIL immediately and wait until the AIL is empty.
610 */
611void
612xfs_ail_push_all_sync(
613 struct xfs_ail *ailp)
614{
615 struct xfs_log_item *lip;
616 DEFINE_WAIT(wait);
617
618 spin_lock(&ailp->xa_lock);
619 while ((lip = xfs_ail_max(ailp)) != NULL) {
620 prepare_to_wait(&ailp->xa_empty, &wait, TASK_UNINTERRUPTIBLE);
621 ailp->xa_target = lip->li_lsn;
622 wake_up_process(ailp->xa_task);
623 spin_unlock(&ailp->xa_lock);
624 schedule();
625 spin_lock(&ailp->xa_lock);
626 }
627 spin_unlock(&ailp->xa_lock);
628
629 finish_wait(&ailp->xa_empty, &wait);
630}
631
632/*
614 * xfs_trans_ail_update - bulk AIL insertion operation. 633 * xfs_trans_ail_update - bulk AIL insertion operation.
615 * 634 *
616 * @xfs_trans_ail_update takes an array of log items that all need to be 635 * @xfs_trans_ail_update takes an array of log items that all need to be
@@ -737,6 +756,8 @@ xfs_trans_ail_delete_bulk(
737 if (mlip_changed) { 756 if (mlip_changed) {
738 if (!XFS_FORCED_SHUTDOWN(ailp->xa_mount)) 757 if (!XFS_FORCED_SHUTDOWN(ailp->xa_mount))
739 xlog_assign_tail_lsn_locked(ailp->xa_mount); 758 xlog_assign_tail_lsn_locked(ailp->xa_mount);
759 if (list_empty(&ailp->xa_ail))
760 wake_up_all(&ailp->xa_empty);
740 spin_unlock(&ailp->xa_lock); 761 spin_unlock(&ailp->xa_lock);
741 762
742 xfs_log_space_wake(ailp->xa_mount); 763 xfs_log_space_wake(ailp->xa_mount);
@@ -773,6 +794,7 @@ xfs_trans_ail_init(
773 INIT_LIST_HEAD(&ailp->xa_ail); 794 INIT_LIST_HEAD(&ailp->xa_ail);
774 INIT_LIST_HEAD(&ailp->xa_cursors); 795 INIT_LIST_HEAD(&ailp->xa_cursors);
775 spin_lock_init(&ailp->xa_lock); 796 spin_lock_init(&ailp->xa_lock);
797 init_waitqueue_head(&ailp->xa_empty);
776 798
777 ailp->xa_task = kthread_run(xfsaild, ailp, "xfsaild/%s", 799 ailp->xa_task = kthread_run(xfsaild, ailp, "xfsaild/%s",
778 ailp->xa_mount->m_fsname); 800 ailp->xa_mount->m_fsname);
diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h
index 46a1ebd4a7a2..218304a8cdc7 100644
--- a/fs/xfs/xfs_trans_priv.h
+++ b/fs/xfs/xfs_trans_priv.h
@@ -71,6 +71,7 @@ struct xfs_ail {
71 spinlock_t xa_lock; 71 spinlock_t xa_lock;
72 xfs_lsn_t xa_last_pushed_lsn; 72 xfs_lsn_t xa_last_pushed_lsn;
73 int xa_log_flush; 73 int xa_log_flush;
74 wait_queue_head_t xa_empty;
74}; 75};
75 76
76/* 77/*
@@ -102,6 +103,7 @@ xfs_trans_ail_delete(
102 103
103void xfs_ail_push(struct xfs_ail *, xfs_lsn_t); 104void xfs_ail_push(struct xfs_ail *, xfs_lsn_t);
104void xfs_ail_push_all(struct xfs_ail *); 105void xfs_ail_push_all(struct xfs_ail *);
106void xfs_ail_push_all_sync(struct xfs_ail *);
105struct xfs_log_item *xfs_ail_min(struct xfs_ail *ailp); 107struct xfs_log_item *xfs_ail_min(struct xfs_ail *ailp);
106xfs_lsn_t xfs_ail_min_lsn(struct xfs_ail *ailp); 108xfs_lsn_t xfs_ail_min_lsn(struct xfs_ail *ailp);
107 109