diff options
author | Dave Chinner <dchinner@redhat.com> | 2013-01-21 07:53:55 -0500 |
---|---|---|
committer | Ben Myers <bpm@sgi.com> | 2013-01-26 10:34:38 -0500 |
commit | 3b19034d4f4554e39ca244fb28962bbf2ccba046 (patch) | |
tree | f37768d858c67d8e982d064d04fa2517914d6778 /fs/xfs | |
parent | 4d559a3bcb7383f34334092af07e68fb60910684 (diff) |
xfs: fix shutdown hang on invalid inode during create
When the new inode verify in xfs_iread() fails, the create
transaction is aborted and a shutdown occurs. The subsequent unmount
then hangs in xfs_wait_buftarg() on a buffer that has an elevated
hold count. Debug showed that it was an AGI buffer getting stuck:
[ 22.576147] XFS (vdb): buffer 0x2/0x1, hold 0x2 stuck
[ 22.976213] XFS (vdb): buffer 0x2/0x1, hold 0x2 stuck
[ 23.376206] XFS (vdb): buffer 0x2/0x1, hold 0x2 stuck
[ 23.776325] XFS (vdb): buffer 0x2/0x1, hold 0x2 stuck
The trace of this buffer leading up to the shutdown (trimmed for
brevity) looks like:
xfs_buf_init: bno 0x2 nblks 0x1 hold 1 caller xfs_buf_get_map
xfs_buf_get: bno 0x2 len 0x200 hold 1 caller xfs_buf_read_map
xfs_buf_read: bno 0x2 len 0x200 hold 1 caller xfs_trans_read_buf_map
xfs_buf_iorequest: bno 0x2 nblks 0x1 hold 1 caller _xfs_buf_read
xfs_buf_hold: bno 0x2 nblks 0x1 hold 1 caller xfs_buf_iorequest
xfs_buf_rele: bno 0x2 nblks 0x1 hold 2 caller xfs_buf_iorequest
xfs_buf_iowait: bno 0x2 nblks 0x1 hold 1 caller _xfs_buf_read
xfs_buf_ioerror: bno 0x2 len 0x200 hold 1 caller xfs_buf_bio_end_io
xfs_buf_iodone: bno 0x2 nblks 0x1 hold 1 caller _xfs_buf_ioend
xfs_buf_iowait_done: bno 0x2 nblks 0x1 hold 1 caller _xfs_buf_read
xfs_buf_hold: bno 0x2 nblks 0x1 hold 1 caller xfs_buf_item_init
xfs_trans_read_buf: bno 0x2 len 0x200 hold 2 recur 0 refcount 1
xfs_trans_brelse: bno 0x2 len 0x200 hold 2 recur 0 refcount 1
xfs_buf_item_relse: bno 0x2 nblks 0x1 hold 2 caller xfs_trans_brelse
xfs_buf_rele: bno 0x2 nblks 0x1 hold 2 caller xfs_buf_item_relse
xfs_buf_unlock: bno 0x2 nblks 0x1 hold 1 caller xfs_trans_brelse
xfs_buf_rele: bno 0x2 nblks 0x1 hold 1 caller xfs_trans_brelse
xfs_buf_trylock: bno 0x2 nblks 0x1 hold 2 caller _xfs_buf_find
xfs_buf_find: bno 0x2 len 0x200 hold 2 caller xfs_buf_get_map
xfs_buf_get: bno 0x2 len 0x200 hold 2 caller xfs_buf_read_map
xfs_buf_read: bno 0x2 len 0x200 hold 2 caller xfs_trans_read_buf_map
xfs_buf_hold: bno 0x2 nblks 0x1 hold 2 caller xfs_buf_item_init
xfs_trans_read_buf: bno 0x2 len 0x200 hold 3 recur 0 refcount 1
xfs_trans_log_buf: bno 0x2 len 0x200 hold 3 recur 0 refcount 1
xfs_buf_item_unlock: bno 0x2 len 0x200 hold 3 flags DIRTY liflags ABORTED
xfs_buf_unlock: bno 0x2 nblks 0x1 hold 3 caller xfs_buf_item_unlock
xfs_buf_rele: bno 0x2 nblks 0x1 hold 3 caller xfs_buf_item_unlock
And that is the AGI buffer from cold cache read into memory to
transaction abort. You can see at transaction abort the bli is dirty
and only has a single reference. The item is not pinned, and it's
not in the AIL. Hence the only reference to it is this transaction.
The problem is that the xfs_buf_item_unlock() call is dropping the
last reference to the xfs_buf_log_item attached to the buffer (which
holds a reference to the buffer), but it is not freeing the
xfs_buf_log_item. Hence nothing will ever release the buffer, and
the unmount hangs waiting for this reference to go away.
The fix is simple - xfs_buf_item_unlock needs to detect the last
reference going away in this case and free the xfs_buf_log_item to
release the reference it holds on the buffer.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Ben Myers <bpm@sgi.com>
Signed-off-by: Ben Myers <bpm@sgi.com>
Diffstat (limited to 'fs/xfs')
-rw-r--r-- | fs/xfs/xfs_buf.c | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_buf_item.c | 12 | ||||
-rw-r--r-- | fs/xfs/xfs_trace.h | 1 |
3 files changed, 13 insertions, 2 deletions
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 689d72655ea6..fbbb9eb92e32 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c | |||
@@ -1505,6 +1505,8 @@ restart: | |||
1505 | while (!list_empty(&btp->bt_lru)) { | 1505 | while (!list_empty(&btp->bt_lru)) { |
1506 | bp = list_first_entry(&btp->bt_lru, struct xfs_buf, b_lru); | 1506 | bp = list_first_entry(&btp->bt_lru, struct xfs_buf, b_lru); |
1507 | if (atomic_read(&bp->b_hold) > 1) { | 1507 | if (atomic_read(&bp->b_hold) > 1) { |
1508 | trace_xfs_buf_wait_buftarg(bp, _RET_IP_); | ||
1509 | list_move_tail(&bp->b_lru, &btp->bt_lru); | ||
1508 | spin_unlock(&btp->bt_lru_lock); | 1510 | spin_unlock(&btp->bt_lru_lock); |
1509 | delay(100); | 1511 | delay(100); |
1510 | goto restart; | 1512 | goto restart; |
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 63c86c48014b..9c4c050e4270 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c | |||
@@ -548,7 +548,10 @@ xfs_buf_item_unlock( | |||
548 | 548 | ||
549 | /* | 549 | /* |
550 | * If the buf item isn't tracking any data, free it, otherwise drop the | 550 | * If the buf item isn't tracking any data, free it, otherwise drop the |
551 | * reference we hold to it. | 551 | * reference we hold to it. If we are aborting the transaction, this may |
552 | * be the only reference to the buf item, so we free it anyway | ||
553 | * regardless of whether it is dirty or not. A dirty abort implies a | ||
554 | * shutdown, anyway. | ||
552 | */ | 555 | */ |
553 | clean = 1; | 556 | clean = 1; |
554 | for (i = 0; i < bip->bli_format_count; i++) { | 557 | for (i = 0; i < bip->bli_format_count; i++) { |
@@ -560,7 +563,12 @@ xfs_buf_item_unlock( | |||
560 | } | 563 | } |
561 | if (clean) | 564 | if (clean) |
562 | xfs_buf_item_relse(bp); | 565 | xfs_buf_item_relse(bp); |
563 | else | 566 | else if (aborted) { |
567 | if (atomic_dec_and_test(&bip->bli_refcount)) { | ||
568 | ASSERT(XFS_FORCED_SHUTDOWN(lip->li_mountp)); | ||
569 | xfs_buf_item_relse(bp); | ||
570 | } | ||
571 | } else | ||
564 | atomic_dec(&bip->bli_refcount); | 572 | atomic_dec(&bip->bli_refcount); |
565 | 573 | ||
566 | if (!hold) | 574 | if (!hold) |
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 2e137d4a85ae..16a812977eab 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h | |||
@@ -341,6 +341,7 @@ DEFINE_BUF_EVENT(xfs_buf_item_relse); | |||
341 | DEFINE_BUF_EVENT(xfs_buf_item_iodone); | 341 | DEFINE_BUF_EVENT(xfs_buf_item_iodone); |
342 | DEFINE_BUF_EVENT(xfs_buf_item_iodone_async); | 342 | DEFINE_BUF_EVENT(xfs_buf_item_iodone_async); |
343 | DEFINE_BUF_EVENT(xfs_buf_error_relse); | 343 | DEFINE_BUF_EVENT(xfs_buf_error_relse); |
344 | DEFINE_BUF_EVENT(xfs_buf_wait_buftarg); | ||
344 | DEFINE_BUF_EVENT(xfs_trans_read_buf_io); | 345 | DEFINE_BUF_EVENT(xfs_trans_read_buf_io); |
345 | DEFINE_BUF_EVENT(xfs_trans_read_buf_shut); | 346 | DEFINE_BUF_EVENT(xfs_trans_read_buf_shut); |
346 | 347 | ||