diff options
| author | Dave Chinner <dchinner@redhat.com> | 2013-01-21 07:53:55 -0500 |
|---|---|---|
| committer | Ben Myers <bpm@sgi.com> | 2013-01-28 13:51:12 -0500 |
| commit | 9f87832a82923943aaab38b8d53658af134bbfa4 (patch) | |
| tree | a777e8186956f3018347710cfa03313bb6f3fd4f /fs/xfs | |
| parent | f2a459565b02b60408f3f2e5ca992a031319712b (diff) | |
xfs: fix shutdown hang on invalid inode during create
When the new inode verify in xfs_iread() fails, the create
transaction is aborted and a shutdown occurs. The subsequent unmount
then hangs in xfs_wait_buftarg() on a buffer that has an elevated
hold count. Debug showed that it was an AGI buffer getting stuck:
[ 22.576147] XFS (vdb): buffer 0x2/0x1, hold 0x2 stuck
[ 22.976213] XFS (vdb): buffer 0x2/0x1, hold 0x2 stuck
[ 23.376206] XFS (vdb): buffer 0x2/0x1, hold 0x2 stuck
[ 23.776325] XFS (vdb): buffer 0x2/0x1, hold 0x2 stuck
The trace of this buffer leading up to the shutdown (trimmed for
brevity) looks like:
xfs_buf_init: bno 0x2 nblks 0x1 hold 1 caller xfs_buf_get_map
xfs_buf_get: bno 0x2 len 0x200 hold 1 caller xfs_buf_read_map
xfs_buf_read: bno 0x2 len 0x200 hold 1 caller xfs_trans_read_buf_map
xfs_buf_iorequest: bno 0x2 nblks 0x1 hold 1 caller _xfs_buf_read
xfs_buf_hold: bno 0x2 nblks 0x1 hold 1 caller xfs_buf_iorequest
xfs_buf_rele: bno 0x2 nblks 0x1 hold 2 caller xfs_buf_iorequest
xfs_buf_iowait: bno 0x2 nblks 0x1 hold 1 caller _xfs_buf_read
xfs_buf_ioerror: bno 0x2 len 0x200 hold 1 caller xfs_buf_bio_end_io
xfs_buf_iodone: bno 0x2 nblks 0x1 hold 1 caller _xfs_buf_ioend
xfs_buf_iowait_done: bno 0x2 nblks 0x1 hold 1 caller _xfs_buf_read
xfs_buf_hold: bno 0x2 nblks 0x1 hold 1 caller xfs_buf_item_init
xfs_trans_read_buf: bno 0x2 len 0x200 hold 2 recur 0 refcount 1
xfs_trans_brelse: bno 0x2 len 0x200 hold 2 recur 0 refcount 1
xfs_buf_item_relse: bno 0x2 nblks 0x1 hold 2 caller xfs_trans_brelse
xfs_buf_rele: bno 0x2 nblks 0x1 hold 2 caller xfs_buf_item_relse
xfs_buf_unlock: bno 0x2 nblks 0x1 hold 1 caller xfs_trans_brelse
xfs_buf_rele: bno 0x2 nblks 0x1 hold 1 caller xfs_trans_brelse
xfs_buf_trylock: bno 0x2 nblks 0x1 hold 2 caller _xfs_buf_find
xfs_buf_find: bno 0x2 len 0x200 hold 2 caller xfs_buf_get_map
xfs_buf_get: bno 0x2 len 0x200 hold 2 caller xfs_buf_read_map
xfs_buf_read: bno 0x2 len 0x200 hold 2 caller xfs_trans_read_buf_map
xfs_buf_hold: bno 0x2 nblks 0x1 hold 2 caller xfs_buf_item_init
xfs_trans_read_buf: bno 0x2 len 0x200 hold 3 recur 0 refcount 1
xfs_trans_log_buf: bno 0x2 len 0x200 hold 3 recur 0 refcount 1
xfs_buf_item_unlock: bno 0x2 len 0x200 hold 3 flags DIRTY liflags ABORTED
xfs_buf_unlock: bno 0x2 nblks 0x1 hold 3 caller xfs_buf_item_unlock
xfs_buf_rele: bno 0x2 nblks 0x1 hold 3 caller xfs_buf_item_unlock
And that is the AGI buffer from cold cache read into memory to
transaction abort. You can see at transaction abort the bli is dirty
and only has a single reference. The item is not pinned, and it's
not in the AIL. Hence the only reference to it is this transaction.
The problem is that the xfs_buf_item_unlock() call is dropping the
last reference to the xfs_buf_log_item attached to the buffer (which
holds a reference to the buffer), but it is not freeing the
xfs_buf_log_item. Hence nothing will ever release the buffer, and
the unmount hangs waiting for this reference to go away.
The fix is simple - xfs_buf_item_unlock needs to detect the last
reference going away in this case and free the xfs_buf_log_item to
release the reference it holds on the buffer.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Ben Myers <bpm@sgi.com>
Signed-off-by: Ben Myers <bpm@sgi.com>
Diffstat (limited to 'fs/xfs')
| -rw-r--r-- | fs/xfs/xfs_buf.c | 2 | ||||
| -rw-r--r-- | fs/xfs/xfs_buf_item.c | 12 | ||||
| -rw-r--r-- | fs/xfs/xfs_trace.h | 1 |
3 files changed, 13 insertions, 2 deletions
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 689d72655ea6..fbbb9eb92e32 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c | |||
| @@ -1505,6 +1505,8 @@ restart: | |||
| 1505 | while (!list_empty(&btp->bt_lru)) { | 1505 | while (!list_empty(&btp->bt_lru)) { |
| 1506 | bp = list_first_entry(&btp->bt_lru, struct xfs_buf, b_lru); | 1506 | bp = list_first_entry(&btp->bt_lru, struct xfs_buf, b_lru); |
| 1507 | if (atomic_read(&bp->b_hold) > 1) { | 1507 | if (atomic_read(&bp->b_hold) > 1) { |
| 1508 | trace_xfs_buf_wait_buftarg(bp, _RET_IP_); | ||
| 1509 | list_move_tail(&bp->b_lru, &btp->bt_lru); | ||
| 1508 | spin_unlock(&btp->bt_lru_lock); | 1510 | spin_unlock(&btp->bt_lru_lock); |
| 1509 | delay(100); | 1511 | delay(100); |
| 1510 | goto restart; | 1512 | goto restart; |
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 77b09750e92c..3f9949fee391 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c | |||
| @@ -652,7 +652,10 @@ xfs_buf_item_unlock( | |||
| 652 | 652 | ||
| 653 | /* | 653 | /* |
| 654 | * If the buf item isn't tracking any data, free it, otherwise drop the | 654 | * If the buf item isn't tracking any data, free it, otherwise drop the |
| 655 | * reference we hold to it. | 655 | * reference we hold to it. If we are aborting the transaction, this may |
| 656 | * be the only reference to the buf item, so we free it anyway | ||
| 657 | * regardless of whether it is dirty or not. A dirty abort implies a | ||
| 658 | * shutdown, anyway. | ||
| 656 | */ | 659 | */ |
| 657 | clean = 1; | 660 | clean = 1; |
| 658 | for (i = 0; i < bip->bli_format_count; i++) { | 661 | for (i = 0; i < bip->bli_format_count; i++) { |
| @@ -664,7 +667,12 @@ xfs_buf_item_unlock( | |||
| 664 | } | 667 | } |
| 665 | if (clean) | 668 | if (clean) |
| 666 | xfs_buf_item_relse(bp); | 669 | xfs_buf_item_relse(bp); |
| 667 | else | 670 | else if (aborted) { |
| 671 | if (atomic_dec_and_test(&bip->bli_refcount)) { | ||
| 672 | ASSERT(XFS_FORCED_SHUTDOWN(lip->li_mountp)); | ||
| 673 | xfs_buf_item_relse(bp); | ||
| 674 | } | ||
| 675 | } else | ||
| 668 | atomic_dec(&bip->bli_refcount); | 676 | atomic_dec(&bip->bli_refcount); |
| 669 | 677 | ||
| 670 | if (!hold) | 678 | if (!hold) |
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 2e137d4a85ae..16a812977eab 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h | |||
| @@ -341,6 +341,7 @@ DEFINE_BUF_EVENT(xfs_buf_item_relse); | |||
| 341 | DEFINE_BUF_EVENT(xfs_buf_item_iodone); | 341 | DEFINE_BUF_EVENT(xfs_buf_item_iodone); |
| 342 | DEFINE_BUF_EVENT(xfs_buf_item_iodone_async); | 342 | DEFINE_BUF_EVENT(xfs_buf_item_iodone_async); |
| 343 | DEFINE_BUF_EVENT(xfs_buf_error_relse); | 343 | DEFINE_BUF_EVENT(xfs_buf_error_relse); |
| 344 | DEFINE_BUF_EVENT(xfs_buf_wait_buftarg); | ||
| 344 | DEFINE_BUF_EVENT(xfs_trans_read_buf_io); | 345 | DEFINE_BUF_EVENT(xfs_trans_read_buf_io); |
| 345 | DEFINE_BUF_EVENT(xfs_trans_read_buf_shut); | 346 | DEFINE_BUF_EVENT(xfs_trans_read_buf_shut); |
| 346 | 347 | ||
