diff options
author | Christoph Hellwig <hch@infradead.org> | 2012-04-23 01:58:36 -0400 |
---|---|---|
committer | Ben Myers <bpm@sgi.com> | 2012-05-14 17:20:28 -0400 |
commit | 4c46819a8097a75d3b378c5e56d2bcf47bb7408d (patch) | |
tree | 031f84bd94f044218d43ef3d11f90df0480513c6 /fs/xfs | |
parent | 8a48088f6439249019b5e17f6391e710656879d9 (diff) |
xfs: do not write the buffer from xfs_iflush
Instead of writing the buffer directly from inside xfs_iflush return it to
the caller and let the caller decide what to do with the buffer. Also
remove the pincount check in xfs_iflush that all non-blocking callers already
implement and the now unused flags parameter.
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Mark Tinguely <tinguely@sgi.com>
Signed-off-by: Ben Myers <bpm@sgi.com>
Diffstat (limited to 'fs/xfs')
-rw-r--r-- | fs/xfs/xfs_inode.c | 54 | ||||
-rw-r--r-- | fs/xfs/xfs_inode.h | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_inode_item.c | 17 | ||||
-rw-r--r-- | fs/xfs/xfs_sync.c | 29 |
4 files changed, 48 insertions, 54 deletions
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 00f9c2f34e1f..0fa987dea242 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c | |||
@@ -2384,22 +2384,22 @@ cluster_corrupt_out: | |||
2384 | } | 2384 | } |
2385 | 2385 | ||
2386 | /* | 2386 | /* |
2387 | * xfs_iflush() will write a modified inode's changes out to the | 2387 | * Flush dirty inode metadata into the backing buffer. |
2388 | * inode's on disk home. The caller must have the inode lock held | 2388 | * |
2389 | * in at least shared mode and the inode flush completion must be | 2389 | * The caller must have the inode lock and the inode flush lock held. The |
2390 | * active as well. The inode lock will still be held upon return from | 2390 | * inode lock will still be held upon return to the caller, and the inode |
2391 | * the call and the caller is free to unlock it. | 2391 | * flush lock will be released after the inode has reached the disk. |
2392 | * The inode flush will be completed when the inode reaches the disk. | 2392 | * |
2393 | * The flags indicate how the inode's buffer should be written out. | 2393 | * The caller must write out the buffer returned in *bpp and release it. |
2394 | */ | 2394 | */ |
2395 | int | 2395 | int |
2396 | xfs_iflush( | 2396 | xfs_iflush( |
2397 | xfs_inode_t *ip, | 2397 | struct xfs_inode *ip, |
2398 | uint flags) | 2398 | struct xfs_buf **bpp) |
2399 | { | 2399 | { |
2400 | xfs_buf_t *bp; | 2400 | struct xfs_mount *mp = ip->i_mount; |
2401 | xfs_dinode_t *dip; | 2401 | struct xfs_buf *bp; |
2402 | xfs_mount_t *mp; | 2402 | struct xfs_dinode *dip; |
2403 | int error; | 2403 | int error; |
2404 | 2404 | ||
2405 | XFS_STATS_INC(xs_iflush_count); | 2405 | XFS_STATS_INC(xs_iflush_count); |
@@ -2409,24 +2409,8 @@ xfs_iflush( | |||
2409 | ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || | 2409 | ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || |
2410 | ip->i_d.di_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK)); | 2410 | ip->i_d.di_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK)); |
2411 | 2411 | ||
2412 | mp = ip->i_mount; | 2412 | *bpp = NULL; |
2413 | 2413 | ||
2414 | /* | ||
2415 | * We can't flush the inode until it is unpinned, so wait for it if we | ||
2416 | * are allowed to block. We know no one new can pin it, because we are | ||
2417 | * holding the inode lock shared and you need to hold it exclusively to | ||
2418 | * pin the inode. | ||
2419 | * | ||
2420 | * If we are not allowed to block, force the log out asynchronously so | ||
2421 | * that when we come back the inode will be unpinned. If other inodes | ||
2422 | * in the same cluster are dirty, they will probably write the inode | ||
2423 | * out for us if they occur after the log force completes. | ||
2424 | */ | ||
2425 | if (!(flags & SYNC_WAIT) && xfs_ipincount(ip)) { | ||
2426 | xfs_iunpin(ip); | ||
2427 | xfs_ifunlock(ip); | ||
2428 | return EAGAIN; | ||
2429 | } | ||
2430 | xfs_iunpin_wait(ip); | 2414 | xfs_iunpin_wait(ip); |
2431 | 2415 | ||
2432 | /* | 2416 | /* |
@@ -2458,8 +2442,7 @@ xfs_iflush( | |||
2458 | /* | 2442 | /* |
2459 | * Get the buffer containing the on-disk inode. | 2443 | * Get the buffer containing the on-disk inode. |
2460 | */ | 2444 | */ |
2461 | error = xfs_itobp(mp, NULL, ip, &dip, &bp, | 2445 | error = xfs_itobp(mp, NULL, ip, &dip, &bp, XBF_TRYLOCK); |
2462 | (flags & SYNC_TRYLOCK) ? XBF_TRYLOCK : XBF_LOCK); | ||
2463 | if (error || !bp) { | 2446 | if (error || !bp) { |
2464 | xfs_ifunlock(ip); | 2447 | xfs_ifunlock(ip); |
2465 | return error; | 2448 | return error; |
@@ -2487,13 +2470,8 @@ xfs_iflush( | |||
2487 | if (error) | 2470 | if (error) |
2488 | goto cluster_corrupt_out; | 2471 | goto cluster_corrupt_out; |
2489 | 2472 | ||
2490 | if (flags & SYNC_WAIT) | 2473 | *bpp = bp; |
2491 | error = xfs_bwrite(bp); | 2474 | return 0; |
2492 | else | ||
2493 | xfs_buf_delwri_queue(bp); | ||
2494 | |||
2495 | xfs_buf_relse(bp); | ||
2496 | return error; | ||
2497 | 2475 | ||
2498 | corrupt_out: | 2476 | corrupt_out: |
2499 | xfs_buf_relse(bp); | 2477 | xfs_buf_relse(bp); |
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 7fee3387e1c8..a2fa79ae410f 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h | |||
@@ -529,7 +529,7 @@ int xfs_iunlink(struct xfs_trans *, xfs_inode_t *); | |||
529 | 529 | ||
530 | void xfs_iext_realloc(xfs_inode_t *, int, int); | 530 | void xfs_iext_realloc(xfs_inode_t *, int, int); |
531 | void xfs_iunpin_wait(xfs_inode_t *); | 531 | void xfs_iunpin_wait(xfs_inode_t *); |
532 | int xfs_iflush(xfs_inode_t *, uint); | 532 | int xfs_iflush(struct xfs_inode *, struct xfs_buf **); |
533 | void xfs_promote_inode(struct xfs_inode *); | 533 | void xfs_promote_inode(struct xfs_inode *); |
534 | void xfs_lock_inodes(xfs_inode_t **, int, uint); | 534 | void xfs_lock_inodes(xfs_inode_t **, int, uint); |
535 | void xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint); | 535 | void xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint); |
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 05d924efceaf..d3601ab75dd3 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c | |||
@@ -506,6 +506,15 @@ xfs_inode_item_trylock( | |||
506 | if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) | 506 | if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) |
507 | return XFS_ITEM_LOCKED; | 507 | return XFS_ITEM_LOCKED; |
508 | 508 | ||
509 | /* | ||
510 | * Re-check the pincount now that we stabilized the value by | ||
511 | * taking the ilock. | ||
512 | */ | ||
513 | if (xfs_ipincount(ip) > 0) { | ||
514 | xfs_iunlock(ip, XFS_ILOCK_SHARED); | ||
515 | return XFS_ITEM_PINNED; | ||
516 | } | ||
517 | |||
509 | if (!xfs_iflock_nowait(ip)) { | 518 | if (!xfs_iflock_nowait(ip)) { |
510 | /* | 519 | /* |
511 | * inode has already been flushed to the backing buffer, | 520 | * inode has already been flushed to the backing buffer, |
@@ -666,6 +675,8 @@ xfs_inode_item_push( | |||
666 | { | 675 | { |
667 | struct xfs_inode_log_item *iip = INODE_ITEM(lip); | 676 | struct xfs_inode_log_item *iip = INODE_ITEM(lip); |
668 | struct xfs_inode *ip = iip->ili_inode; | 677 | struct xfs_inode *ip = iip->ili_inode; |
678 | struct xfs_buf *bp = NULL; | ||
679 | int error; | ||
669 | 680 | ||
670 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED)); | 681 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED)); |
671 | ASSERT(xfs_isiflocked(ip)); | 682 | ASSERT(xfs_isiflocked(ip)); |
@@ -689,7 +700,11 @@ xfs_inode_item_push( | |||
689 | * will pull the inode from the AIL, mark it clean and unlock the flush | 700 | * will pull the inode from the AIL, mark it clean and unlock the flush |
690 | * lock. | 701 | * lock. |
691 | */ | 702 | */ |
692 | (void) xfs_iflush(ip, SYNC_TRYLOCK); | 703 | error = xfs_iflush(ip, &bp); |
704 | if (!error) { | ||
705 | xfs_buf_delwri_queue(bp); | ||
706 | xfs_buf_relse(bp); | ||
707 | } | ||
693 | xfs_iunlock(ip, XFS_ILOCK_SHARED); | 708 | xfs_iunlock(ip, XFS_ILOCK_SHARED); |
694 | } | 709 | } |
695 | 710 | ||
diff --git a/fs/xfs/xfs_sync.c b/fs/xfs/xfs_sync.c index 7b2bccc4d67b..468c3c0a4f9f 100644 --- a/fs/xfs/xfs_sync.c +++ b/fs/xfs/xfs_sync.c | |||
@@ -648,10 +648,6 @@ xfs_reclaim_inode_grab( | |||
648 | * (*) dgc: I don't think the clean, pinned state is possible but it gets | 648 | * (*) dgc: I don't think the clean, pinned state is possible but it gets |
649 | * handled anyway given the order of checks implemented. | 649 | * handled anyway given the order of checks implemented. |
650 | * | 650 | * |
651 | * As can be seen from the table, the return value of xfs_iflush() is not | ||
652 | * sufficient to correctly decide the reclaim action here. The checks in | ||
653 | * xfs_iflush() might look like duplicates, but they are not. | ||
654 | * | ||
655 | * Also, because we get the flush lock first, we know that any inode that has | 651 | * Also, because we get the flush lock first, we know that any inode that has |
656 | * been flushed delwri has had the flush completed by the time we check that | 652 | * been flushed delwri has had the flush completed by the time we check that |
657 | * the inode is clean. | 653 | * the inode is clean. |
@@ -679,7 +675,8 @@ xfs_reclaim_inode( | |||
679 | struct xfs_perag *pag, | 675 | struct xfs_perag *pag, |
680 | int sync_mode) | 676 | int sync_mode) |
681 | { | 677 | { |
682 | int error; | 678 | struct xfs_buf *bp = NULL; |
679 | int error; | ||
683 | 680 | ||
684 | restart: | 681 | restart: |
685 | error = 0; | 682 | error = 0; |
@@ -728,29 +725,33 @@ restart: | |||
728 | /* | 725 | /* |
729 | * Now we have an inode that needs flushing. | 726 | * Now we have an inode that needs flushing. |
730 | * | 727 | * |
731 | * We do a nonblocking flush here even if we are doing a SYNC_WAIT | 728 | * Note that xfs_iflush will never block on the inode buffer lock, as |
732 | * reclaim as we can deadlock with inode cluster removal. | ||
733 | * xfs_ifree_cluster() can lock the inode buffer before it locks the | 729 | * xfs_ifree_cluster() can lock the inode buffer before it locks the |
734 | * ip->i_lock, and we are doing the exact opposite here. As a result, | 730 | * ip->i_lock, and we are doing the exact opposite here. As a result, |
735 | * doing a blocking xfs_itobp() to get the cluster buffer will result | 731 | * doing a blocking xfs_itobp() to get the cluster buffer would result |
736 | * in an ABBA deadlock with xfs_ifree_cluster(). | 732 | * in an ABBA deadlock with xfs_ifree_cluster(). |
737 | * | 733 | * |
738 | * As xfs_ifree_cluser() must gather all inodes that are active in the | 734 | * As xfs_ifree_cluser() must gather all inodes that are active in the |
739 | * cache to mark them stale, if we hit this case we don't actually want | 735 | * cache to mark them stale, if we hit this case we don't actually want |
740 | * to do IO here - we want the inode marked stale so we can simply | 736 | * to do IO here - we want the inode marked stale so we can simply |
741 | * reclaim it. Hence if we get an EAGAIN error on a SYNC_WAIT flush, | 737 | * reclaim it. Hence if we get an EAGAIN error here, just unlock the |
742 | * just unlock the inode, back off and try again. Hopefully the next | 738 | * inode, back off and try again. Hopefully the next pass through will |
743 | * pass through will see the stale flag set on the inode. | 739 | * see the stale flag set on the inode. |
744 | */ | 740 | */ |
745 | error = xfs_iflush(ip, SYNC_TRYLOCK | sync_mode); | 741 | error = xfs_iflush(ip, &bp); |
746 | if (error == EAGAIN) { | 742 | if (error == EAGAIN) { |
747 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 743 | xfs_iunlock(ip, XFS_ILOCK_EXCL); |
748 | /* backoff longer than in xfs_ifree_cluster */ | 744 | /* backoff longer than in xfs_ifree_cluster */ |
749 | delay(2); | 745 | delay(2); |
750 | goto restart; | 746 | goto restart; |
751 | } | 747 | } |
752 | xfs_iflock(ip); | ||
753 | 748 | ||
749 | if (!error) { | ||
750 | error = xfs_bwrite(bp); | ||
751 | xfs_buf_relse(bp); | ||
752 | } | ||
753 | |||
754 | xfs_iflock(ip); | ||
754 | reclaim: | 755 | reclaim: |
755 | xfs_ifunlock(ip); | 756 | xfs_ifunlock(ip); |
756 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 757 | xfs_iunlock(ip, XFS_ILOCK_EXCL); |