aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs
diff options
context:
space:
mode:
authorChristoph Hellwig <hch@infradead.org>2012-04-23 01:58:36 -0400
committerBen Myers <bpm@sgi.com>2012-05-14 17:20:28 -0400
commit4c46819a8097a75d3b378c5e56d2bcf47bb7408d (patch)
tree031f84bd94f044218d43ef3d11f90df0480513c6 /fs/xfs
parent8a48088f6439249019b5e17f6391e710656879d9 (diff)
xfs: do not write the buffer from xfs_iflush
Instead of writing the buffer directly from inside xfs_iflush return it to the caller and let the caller decide what to do with the buffer. Also remove the pincount check in xfs_iflush that all non-blocking callers already implement and the now unused flags parameter. Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Mark Tinguely <tinguely@sgi.com> Signed-off-by: Ben Myers <bpm@sgi.com>
Diffstat (limited to 'fs/xfs')
-rw-r--r--fs/xfs/xfs_inode.c54
-rw-r--r--fs/xfs/xfs_inode.h2
-rw-r--r--fs/xfs/xfs_inode_item.c17
-rw-r--r--fs/xfs/xfs_sync.c29
4 files changed, 48 insertions, 54 deletions
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 00f9c2f34e1f..0fa987dea242 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -2384,22 +2384,22 @@ cluster_corrupt_out:
2384} 2384}
2385 2385
2386/* 2386/*
2387 * xfs_iflush() will write a modified inode's changes out to the 2387 * Flush dirty inode metadata into the backing buffer.
2388 * inode's on disk home. The caller must have the inode lock held 2388 *
2389 * in at least shared mode and the inode flush completion must be 2389 * The caller must have the inode lock and the inode flush lock held. The
2390 * active as well. The inode lock will still be held upon return from 2390 * inode lock will still be held upon return to the caller, and the inode
2391 * the call and the caller is free to unlock it. 2391 * flush lock will be released after the inode has reached the disk.
2392 * The inode flush will be completed when the inode reaches the disk. 2392 *
2393 * The flags indicate how the inode's buffer should be written out. 2393 * The caller must write out the buffer returned in *bpp and release it.
2394 */ 2394 */
2395int 2395int
2396xfs_iflush( 2396xfs_iflush(
2397 xfs_inode_t *ip, 2397 struct xfs_inode *ip,
2398 uint flags) 2398 struct xfs_buf **bpp)
2399{ 2399{
2400 xfs_buf_t *bp; 2400 struct xfs_mount *mp = ip->i_mount;
2401 xfs_dinode_t *dip; 2401 struct xfs_buf *bp;
2402 xfs_mount_t *mp; 2402 struct xfs_dinode *dip;
2403 int error; 2403 int error;
2404 2404
2405 XFS_STATS_INC(xs_iflush_count); 2405 XFS_STATS_INC(xs_iflush_count);
@@ -2409,24 +2409,8 @@ xfs_iflush(
2409 ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || 2409 ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
2410 ip->i_d.di_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK)); 2410 ip->i_d.di_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK));
2411 2411
2412 mp = ip->i_mount; 2412 *bpp = NULL;
2413 2413
2414 /*
2415 * We can't flush the inode until it is unpinned, so wait for it if we
2416 * are allowed to block. We know no one new can pin it, because we are
2417 * holding the inode lock shared and you need to hold it exclusively to
2418 * pin the inode.
2419 *
2420 * If we are not allowed to block, force the log out asynchronously so
2421 * that when we come back the inode will be unpinned. If other inodes
2422 * in the same cluster are dirty, they will probably write the inode
2423 * out for us if they occur after the log force completes.
2424 */
2425 if (!(flags & SYNC_WAIT) && xfs_ipincount(ip)) {
2426 xfs_iunpin(ip);
2427 xfs_ifunlock(ip);
2428 return EAGAIN;
2429 }
2430 xfs_iunpin_wait(ip); 2414 xfs_iunpin_wait(ip);
2431 2415
2432 /* 2416 /*
@@ -2458,8 +2442,7 @@ xfs_iflush(
2458 /* 2442 /*
2459 * Get the buffer containing the on-disk inode. 2443 * Get the buffer containing the on-disk inode.
2460 */ 2444 */
2461 error = xfs_itobp(mp, NULL, ip, &dip, &bp, 2445 error = xfs_itobp(mp, NULL, ip, &dip, &bp, XBF_TRYLOCK);
2462 (flags & SYNC_TRYLOCK) ? XBF_TRYLOCK : XBF_LOCK);
2463 if (error || !bp) { 2446 if (error || !bp) {
2464 xfs_ifunlock(ip); 2447 xfs_ifunlock(ip);
2465 return error; 2448 return error;
@@ -2487,13 +2470,8 @@ xfs_iflush(
2487 if (error) 2470 if (error)
2488 goto cluster_corrupt_out; 2471 goto cluster_corrupt_out;
2489 2472
2490 if (flags & SYNC_WAIT) 2473 *bpp = bp;
2491 error = xfs_bwrite(bp); 2474 return 0;
2492 else
2493 xfs_buf_delwri_queue(bp);
2494
2495 xfs_buf_relse(bp);
2496 return error;
2497 2475
2498corrupt_out: 2476corrupt_out:
2499 xfs_buf_relse(bp); 2477 xfs_buf_relse(bp);
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 7fee3387e1c8..a2fa79ae410f 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -529,7 +529,7 @@ int xfs_iunlink(struct xfs_trans *, xfs_inode_t *);
529 529
530void xfs_iext_realloc(xfs_inode_t *, int, int); 530void xfs_iext_realloc(xfs_inode_t *, int, int);
531void xfs_iunpin_wait(xfs_inode_t *); 531void xfs_iunpin_wait(xfs_inode_t *);
532int xfs_iflush(xfs_inode_t *, uint); 532int xfs_iflush(struct xfs_inode *, struct xfs_buf **);
533void xfs_promote_inode(struct xfs_inode *); 533void xfs_promote_inode(struct xfs_inode *);
534void xfs_lock_inodes(xfs_inode_t **, int, uint); 534void xfs_lock_inodes(xfs_inode_t **, int, uint);
535void xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint); 535void xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint);
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 05d924efceaf..d3601ab75dd3 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -506,6 +506,15 @@ xfs_inode_item_trylock(
506 if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) 506 if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED))
507 return XFS_ITEM_LOCKED; 507 return XFS_ITEM_LOCKED;
508 508
509 /*
510 * Re-check the pincount now that we stabilized the value by
511 * taking the ilock.
512 */
513 if (xfs_ipincount(ip) > 0) {
514 xfs_iunlock(ip, XFS_ILOCK_SHARED);
515 return XFS_ITEM_PINNED;
516 }
517
509 if (!xfs_iflock_nowait(ip)) { 518 if (!xfs_iflock_nowait(ip)) {
510 /* 519 /*
511 * inode has already been flushed to the backing buffer, 520 * inode has already been flushed to the backing buffer,
@@ -666,6 +675,8 @@ xfs_inode_item_push(
666{ 675{
667 struct xfs_inode_log_item *iip = INODE_ITEM(lip); 676 struct xfs_inode_log_item *iip = INODE_ITEM(lip);
668 struct xfs_inode *ip = iip->ili_inode; 677 struct xfs_inode *ip = iip->ili_inode;
678 struct xfs_buf *bp = NULL;
679 int error;
669 680
670 ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED)); 681 ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED));
671 ASSERT(xfs_isiflocked(ip)); 682 ASSERT(xfs_isiflocked(ip));
@@ -689,7 +700,11 @@ xfs_inode_item_push(
689 * will pull the inode from the AIL, mark it clean and unlock the flush 700 * will pull the inode from the AIL, mark it clean and unlock the flush
690 * lock. 701 * lock.
691 */ 702 */
692 (void) xfs_iflush(ip, SYNC_TRYLOCK); 703 error = xfs_iflush(ip, &bp);
704 if (!error) {
705 xfs_buf_delwri_queue(bp);
706 xfs_buf_relse(bp);
707 }
693 xfs_iunlock(ip, XFS_ILOCK_SHARED); 708 xfs_iunlock(ip, XFS_ILOCK_SHARED);
694} 709}
695 710
diff --git a/fs/xfs/xfs_sync.c b/fs/xfs/xfs_sync.c
index 7b2bccc4d67b..468c3c0a4f9f 100644
--- a/fs/xfs/xfs_sync.c
+++ b/fs/xfs/xfs_sync.c
@@ -648,10 +648,6 @@ xfs_reclaim_inode_grab(
648 * (*) dgc: I don't think the clean, pinned state is possible but it gets 648 * (*) dgc: I don't think the clean, pinned state is possible but it gets
649 * handled anyway given the order of checks implemented. 649 * handled anyway given the order of checks implemented.
650 * 650 *
651 * As can be seen from the table, the return value of xfs_iflush() is not
652 * sufficient to correctly decide the reclaim action here. The checks in
653 * xfs_iflush() might look like duplicates, but they are not.
654 *
655 * Also, because we get the flush lock first, we know that any inode that has 651 * Also, because we get the flush lock first, we know that any inode that has
656 * been flushed delwri has had the flush completed by the time we check that 652 * been flushed delwri has had the flush completed by the time we check that
657 * the inode is clean. 653 * the inode is clean.
@@ -679,7 +675,8 @@ xfs_reclaim_inode(
679 struct xfs_perag *pag, 675 struct xfs_perag *pag,
680 int sync_mode) 676 int sync_mode)
681{ 677{
682 int error; 678 struct xfs_buf *bp = NULL;
679 int error;
683 680
684restart: 681restart:
685 error = 0; 682 error = 0;
@@ -728,29 +725,33 @@ restart:
728 /* 725 /*
729 * Now we have an inode that needs flushing. 726 * Now we have an inode that needs flushing.
730 * 727 *
731 * We do a nonblocking flush here even if we are doing a SYNC_WAIT 728 * Note that xfs_iflush will never block on the inode buffer lock, as
732 * reclaim as we can deadlock with inode cluster removal.
733 * xfs_ifree_cluster() can lock the inode buffer before it locks the 729 * xfs_ifree_cluster() can lock the inode buffer before it locks the
734 * ip->i_lock, and we are doing the exact opposite here. As a result, 730 * ip->i_lock, and we are doing the exact opposite here. As a result,
735 * doing a blocking xfs_itobp() to get the cluster buffer will result 731 * doing a blocking xfs_itobp() to get the cluster buffer would result
736 * in an ABBA deadlock with xfs_ifree_cluster(). 732 * in an ABBA deadlock with xfs_ifree_cluster().
737 * 733 *
738 * As xfs_ifree_cluser() must gather all inodes that are active in the 734 * As xfs_ifree_cluser() must gather all inodes that are active in the
739 * cache to mark them stale, if we hit this case we don't actually want 735 * cache to mark them stale, if we hit this case we don't actually want
740 * to do IO here - we want the inode marked stale so we can simply 736 * to do IO here - we want the inode marked stale so we can simply
741 * reclaim it. Hence if we get an EAGAIN error on a SYNC_WAIT flush, 737 * reclaim it. Hence if we get an EAGAIN error here, just unlock the
742 * just unlock the inode, back off and try again. Hopefully the next 738 * inode, back off and try again. Hopefully the next pass through will
743 * pass through will see the stale flag set on the inode. 739 * see the stale flag set on the inode.
744 */ 740 */
745 error = xfs_iflush(ip, SYNC_TRYLOCK | sync_mode); 741 error = xfs_iflush(ip, &bp);
746 if (error == EAGAIN) { 742 if (error == EAGAIN) {
747 xfs_iunlock(ip, XFS_ILOCK_EXCL); 743 xfs_iunlock(ip, XFS_ILOCK_EXCL);
748 /* backoff longer than in xfs_ifree_cluster */ 744 /* backoff longer than in xfs_ifree_cluster */
749 delay(2); 745 delay(2);
750 goto restart; 746 goto restart;
751 } 747 }
752 xfs_iflock(ip);
753 748
749 if (!error) {
750 error = xfs_bwrite(bp);
751 xfs_buf_relse(bp);
752 }
753
754 xfs_iflock(ip);
754reclaim: 755reclaim:
755 xfs_ifunlock(ip); 756 xfs_ifunlock(ip);
756 xfs_iunlock(ip, XFS_ILOCK_EXCL); 757 xfs_iunlock(ip, XFS_ILOCK_EXCL);