diff options
author | Dave Chinner <dchinner@redhat.com> | 2013-01-21 07:53:55 -0500 |
---|---|---|
committer | Ben Myers <bpm@sgi.com> | 2013-01-28 13:51:12 -0500 |
commit | 9f87832a82923943aaab38b8d53658af134bbfa4 (patch) | |
tree | a777e8186956f3018347710cfa03313bb6f3fd4f /fs | |
parent | f2a459565b02b60408f3f2e5ca992a031319712b (diff) |
xfs: fix shutdown hang on invalid inode during create
When the new inode verify in xfs_iread() fails, the create
transaction is aborted and a shutdown occurs. The subsequent unmount
then hangs in xfs_wait_buftarg() on a buffer that has an elevated
hold count. Debug showed that it was an AGI buffer getting stuck:
[ 22.576147] XFS (vdb): buffer 0x2/0x1, hold 0x2 stuck
[ 22.976213] XFS (vdb): buffer 0x2/0x1, hold 0x2 stuck
[ 23.376206] XFS (vdb): buffer 0x2/0x1, hold 0x2 stuck
[ 23.776325] XFS (vdb): buffer 0x2/0x1, hold 0x2 stuck
The trace of this buffer leading up to the shutdown (trimmed for
brevity) looks like:
xfs_buf_init: bno 0x2 nblks 0x1 hold 1 caller xfs_buf_get_map
xfs_buf_get: bno 0x2 len 0x200 hold 1 caller xfs_buf_read_map
xfs_buf_read: bno 0x2 len 0x200 hold 1 caller xfs_trans_read_buf_map
xfs_buf_iorequest: bno 0x2 nblks 0x1 hold 1 caller _xfs_buf_read
xfs_buf_hold: bno 0x2 nblks 0x1 hold 1 caller xfs_buf_iorequest
xfs_buf_rele: bno 0x2 nblks 0x1 hold 2 caller xfs_buf_iorequest
xfs_buf_iowait: bno 0x2 nblks 0x1 hold 1 caller _xfs_buf_read
xfs_buf_ioerror: bno 0x2 len 0x200 hold 1 caller xfs_buf_bio_end_io
xfs_buf_iodone: bno 0x2 nblks 0x1 hold 1 caller _xfs_buf_ioend
xfs_buf_iowait_done: bno 0x2 nblks 0x1 hold 1 caller _xfs_buf_read
xfs_buf_hold: bno 0x2 nblks 0x1 hold 1 caller xfs_buf_item_init
xfs_trans_read_buf: bno 0x2 len 0x200 hold 2 recur 0 refcount 1
xfs_trans_brelse: bno 0x2 len 0x200 hold 2 recur 0 refcount 1
xfs_buf_item_relse: bno 0x2 nblks 0x1 hold 2 caller xfs_trans_brelse
xfs_buf_rele: bno 0x2 nblks 0x1 hold 2 caller xfs_buf_item_relse
xfs_buf_unlock: bno 0x2 nblks 0x1 hold 1 caller xfs_trans_brelse
xfs_buf_rele: bno 0x2 nblks 0x1 hold 1 caller xfs_trans_brelse
xfs_buf_trylock: bno 0x2 nblks 0x1 hold 2 caller _xfs_buf_find
xfs_buf_find: bno 0x2 len 0x200 hold 2 caller xfs_buf_get_map
xfs_buf_get: bno 0x2 len 0x200 hold 2 caller xfs_buf_read_map
xfs_buf_read: bno 0x2 len 0x200 hold 2 caller xfs_trans_read_buf_map
xfs_buf_hold: bno 0x2 nblks 0x1 hold 2 caller xfs_buf_item_init
xfs_trans_read_buf: bno 0x2 len 0x200 hold 3 recur 0 refcount 1
xfs_trans_log_buf: bno 0x2 len 0x200 hold 3 recur 0 refcount 1
xfs_buf_item_unlock: bno 0x2 len 0x200 hold 3 flags DIRTY liflags ABORTED
xfs_buf_unlock: bno 0x2 nblks 0x1 hold 3 caller xfs_buf_item_unlock
xfs_buf_rele: bno 0x2 nblks 0x1 hold 3 caller xfs_buf_item_unlock
And that is the AGI buffer from cold cache read into memory to
transaction abort. You can see at transaction abort the bli is dirty
and only has a single reference. The item is not pinned, and it's
not in the AIL. Hence the only reference to it is this transaction.
The problem is that the xfs_buf_item_unlock() call is dropping the
last reference to the xfs_buf_log_item attached to the buffer (which
holds a reference to the buffer), but it is not freeing the
xfs_buf_log_item. Hence nothing will ever release the buffer, and
the unmount hangs waiting for this reference to go away.
The fix is simple - xfs_buf_item_unlock needs to detect the last
reference going away in this case and free the xfs_buf_log_item to
release the reference it holds on the buffer.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Ben Myers <bpm@sgi.com>
Signed-off-by: Ben Myers <bpm@sgi.com>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/xfs/xfs_buf.c | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_buf_item.c | 12 | ||||
-rw-r--r-- | fs/xfs/xfs_trace.h | 1 |
3 files changed, 13 insertions, 2 deletions
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 689d72655ea6..fbbb9eb92e32 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c | |||
@@ -1505,6 +1505,8 @@ restart: | |||
1505 | while (!list_empty(&btp->bt_lru)) { | 1505 | while (!list_empty(&btp->bt_lru)) { |
1506 | bp = list_first_entry(&btp->bt_lru, struct xfs_buf, b_lru); | 1506 | bp = list_first_entry(&btp->bt_lru, struct xfs_buf, b_lru); |
1507 | if (atomic_read(&bp->b_hold) > 1) { | 1507 | if (atomic_read(&bp->b_hold) > 1) { |
1508 | trace_xfs_buf_wait_buftarg(bp, _RET_IP_); | ||
1509 | list_move_tail(&bp->b_lru, &btp->bt_lru); | ||
1508 | spin_unlock(&btp->bt_lru_lock); | 1510 | spin_unlock(&btp->bt_lru_lock); |
1509 | delay(100); | 1511 | delay(100); |
1510 | goto restart; | 1512 | goto restart; |
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 77b09750e92c..3f9949fee391 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c | |||
@@ -652,7 +652,10 @@ xfs_buf_item_unlock( | |||
652 | 652 | ||
653 | /* | 653 | /* |
654 | * If the buf item isn't tracking any data, free it, otherwise drop the | 654 | * If the buf item isn't tracking any data, free it, otherwise drop the |
655 | * reference we hold to it. | 655 | * reference we hold to it. If we are aborting the transaction, this may |
656 | * be the only reference to the buf item, so we free it anyway | ||
657 | * regardless of whether it is dirty or not. A dirty abort implies a | ||
658 | * shutdown, anyway. | ||
656 | */ | 659 | */ |
657 | clean = 1; | 660 | clean = 1; |
658 | for (i = 0; i < bip->bli_format_count; i++) { | 661 | for (i = 0; i < bip->bli_format_count; i++) { |
@@ -664,7 +667,12 @@ xfs_buf_item_unlock( | |||
664 | } | 667 | } |
665 | if (clean) | 668 | if (clean) |
666 | xfs_buf_item_relse(bp); | 669 | xfs_buf_item_relse(bp); |
667 | else | 670 | else if (aborted) { |
671 | if (atomic_dec_and_test(&bip->bli_refcount)) { | ||
672 | ASSERT(XFS_FORCED_SHUTDOWN(lip->li_mountp)); | ||
673 | xfs_buf_item_relse(bp); | ||
674 | } | ||
675 | } else | ||
668 | atomic_dec(&bip->bli_refcount); | 676 | atomic_dec(&bip->bli_refcount); |
669 | 677 | ||
670 | if (!hold) | 678 | if (!hold) |
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 2e137d4a85ae..16a812977eab 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h | |||
@@ -341,6 +341,7 @@ DEFINE_BUF_EVENT(xfs_buf_item_relse); | |||
341 | DEFINE_BUF_EVENT(xfs_buf_item_iodone); | 341 | DEFINE_BUF_EVENT(xfs_buf_item_iodone); |
342 | DEFINE_BUF_EVENT(xfs_buf_item_iodone_async); | 342 | DEFINE_BUF_EVENT(xfs_buf_item_iodone_async); |
343 | DEFINE_BUF_EVENT(xfs_buf_error_relse); | 343 | DEFINE_BUF_EVENT(xfs_buf_error_relse); |
344 | DEFINE_BUF_EVENT(xfs_buf_wait_buftarg); | ||
344 | DEFINE_BUF_EVENT(xfs_trans_read_buf_io); | 345 | DEFINE_BUF_EVENT(xfs_trans_read_buf_io); |
345 | DEFINE_BUF_EVENT(xfs_trans_read_buf_shut); | 346 | DEFINE_BUF_EVENT(xfs_trans_read_buf_shut); |
346 | 347 | ||