diff options
author | Dave Chinner <dchinner@redhat.com> | 2011-01-26 20:13:35 -0500 |
---|---|---|
committer | Alex Elder <aelder@sgi.com> | 2011-01-28 10:01:33 -0500 |
commit | e34a314c5e49fe6b763568f6576b19f1299c33c2 (patch) | |
tree | 25cd4abc329c68cdb268ae527e2319204d223d58 | |
parent | 7db37c5e6575b229a5051be1d3ef15257ae0ba5d (diff) |
xfs: fix efi item leak on forced shutdown
After test 139, kmemleak shows:
unreferenced object 0xffff880078b405d8 (size 400):
comm "xfs_io", pid 4904, jiffies 4294909383 (age 1186.728s)
hex dump (first 32 bytes):
60 c1 17 79 00 88 ff ff 60 c1 17 79 00 88 ff ff `..y....`..y....
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
backtrace:
[<ffffffff81afb04d>] kmemleak_alloc+0x2d/0x60
[<ffffffff8115c6cf>] kmem_cache_alloc+0x13f/0x2b0
[<ffffffff814aaa97>] kmem_zone_alloc+0x77/0xf0
[<ffffffff814aab2e>] kmem_zone_zalloc+0x1e/0x50
[<ffffffff8147cd6b>] xfs_efi_init+0x4b/0xb0
[<ffffffff814a4ee8>] xfs_trans_get_efi+0x58/0x90
[<ffffffff81455fab>] xfs_bmap_finish+0x8b/0x1d0
[<ffffffff814851b4>] xfs_itruncate_finish+0x2c4/0x5d0
[<ffffffff814a970f>] xfs_setattr+0x8df/0xa70
[<ffffffff814b5c7b>] xfs_vn_setattr+0x1b/0x20
[<ffffffff8117dc00>] notify_change+0x170/0x2e0
[<ffffffff81163bf6>] do_truncate+0x66/0xa0
[<ffffffff81163d0b>] sys_ftruncate+0xdb/0xe0
[<ffffffff8103a002>] system_call_fastpath+0x16/0x1b
[<ffffffffffffffff>] 0xffffffffffffffff
The cause of the leak is that the "remove" parameter of IOP_UNPIN()
is never set when a CIL push is aborted. This means that the EFI
item is never freed if it was in the push being cancelled. The
problem is specific to delayed logging, but has uncovered a couple
of problems with the handling of IOP_UNPIN(remove).
Firstly, we cannot safely call xfs_trans_del_item() from IOP_UNPIN()
in the CIL commit failure path or the iclog write failure path
because for delayed loging we have no transaction context. Hence we
must only call xfs_trans_del_item() if the log item being unpinned
has an active log item descriptor.
Secondly, xfs_trans_uncommit() does not handle log item descriptor
freeing during the traversal of log items on a transaction. It can
reference a freed log item descriptor when unpinning an EFI item.
Hence it needs to use a safe list traversal method to allow items to
be removed from the transaction during IOP_UNPIN().
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Alex Elder <aelder@sgi.com>
-rw-r--r-- | fs/xfs/xfs_buf_item.c | 12 | ||||
-rw-r--r-- | fs/xfs/xfs_extfree_item.c | 3 | ||||
-rw-r--r-- | fs/xfs/xfs_trans.c | 36 |
3 files changed, 38 insertions, 13 deletions
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 98c6f73b6752..6f8c21ce0d6d 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c | |||
@@ -427,13 +427,15 @@ xfs_buf_item_unpin( | |||
427 | 427 | ||
428 | if (remove) { | 428 | if (remove) { |
429 | /* | 429 | /* |
430 | * We have to remove the log item from the transaction | 430 | * If we are in a transaction context, we have to |
431 | * as we are about to release our reference to the | 431 | * remove the log item from the transaction as we are |
432 | * buffer. If we don't, the unlock that occurs later | 432 | * about to release our reference to the buffer. If we |
433 | * in xfs_trans_uncommit() will ry to reference the | 433 | * don't, the unlock that occurs later in |
434 | * xfs_trans_uncommit() will try to reference the | ||
434 | * buffer which we no longer have a hold on. | 435 | * buffer which we no longer have a hold on. |
435 | */ | 436 | */ |
436 | xfs_trans_del_item(lip); | 437 | if (lip->li_desc) |
438 | xfs_trans_del_item(lip); | ||
437 | 439 | ||
438 | /* | 440 | /* |
439 | * Since the transaction no longer refers to the buffer, | 441 | * Since the transaction no longer refers to the buffer, |
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index 75f2ef60e579..d22e62623437 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c | |||
@@ -138,7 +138,8 @@ xfs_efi_item_unpin( | |||
138 | 138 | ||
139 | if (remove) { | 139 | if (remove) { |
140 | ASSERT(!(lip->li_flags & XFS_LI_IN_AIL)); | 140 | ASSERT(!(lip->li_flags & XFS_LI_IN_AIL)); |
141 | xfs_trans_del_item(lip); | 141 | if (lip->li_desc) |
142 | xfs_trans_del_item(lip); | ||
142 | xfs_efi_item_free(efip); | 143 | xfs_efi_item_free(efip); |
143 | return; | 144 | return; |
144 | } | 145 | } |
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 33dbc4e0ad62..29f5e5424897 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c | |||
@@ -1446,6 +1446,14 @@ xfs_log_item_batch_insert( | |||
1446 | * Bulk operation version of xfs_trans_committed that takes a log vector of | 1446 | * Bulk operation version of xfs_trans_committed that takes a log vector of |
1447 | * items to insert into the AIL. This uses bulk AIL insertion techniques to | 1447 | * items to insert into the AIL. This uses bulk AIL insertion techniques to |
1448 | * minimise lock traffic. | 1448 | * minimise lock traffic. |
1449 | * | ||
1450 | * If we are called with the aborted flag set, it is because a log write during | ||
1451 | * a CIL checkpoint commit has failed. In this case, all the items in the | ||
1452 | * checkpoint have already gone through IOP_COMMITED and IOP_UNLOCK, which | ||
1453 | * means that checkpoint commit abort handling is treated exactly the same | ||
1454 | * as an iclog write error even though we haven't started any IO yet. Hence in | ||
1455 | * this case all we need to do is IOP_COMMITTED processing, followed by an | ||
1456 | * IOP_UNPIN(aborted) call. | ||
1449 | */ | 1457 | */ |
1450 | void | 1458 | void |
1451 | xfs_trans_committed_bulk( | 1459 | xfs_trans_committed_bulk( |
@@ -1472,6 +1480,16 @@ xfs_trans_committed_bulk( | |||
1472 | if (XFS_LSN_CMP(item_lsn, (xfs_lsn_t)-1) == 0) | 1480 | if (XFS_LSN_CMP(item_lsn, (xfs_lsn_t)-1) == 0) |
1473 | continue; | 1481 | continue; |
1474 | 1482 | ||
1483 | /* | ||
1484 | * if we are aborting the operation, no point in inserting the | ||
1485 | * object into the AIL as we are in a shutdown situation. | ||
1486 | */ | ||
1487 | if (aborted) { | ||
1488 | ASSERT(XFS_FORCED_SHUTDOWN(ailp->xa_mount)); | ||
1489 | IOP_UNPIN(lip, 1); | ||
1490 | continue; | ||
1491 | } | ||
1492 | |||
1475 | if (item_lsn != commit_lsn) { | 1493 | if (item_lsn != commit_lsn) { |
1476 | 1494 | ||
1477 | /* | 1495 | /* |
@@ -1503,20 +1521,24 @@ xfs_trans_committed_bulk( | |||
1503 | } | 1521 | } |
1504 | 1522 | ||
1505 | /* | 1523 | /* |
1506 | * Called from the trans_commit code when we notice that | 1524 | * Called from the trans_commit code when we notice that the filesystem is in |
1507 | * the filesystem is in the middle of a forced shutdown. | 1525 | * the middle of a forced shutdown. |
1526 | * | ||
1527 | * When we are called here, we have already pinned all the items in the | ||
1528 | * transaction. However, neither IOP_COMMITTING or IOP_UNLOCK has been called | ||
1529 | * so we can simply walk the items in the transaction, unpin them with an abort | ||
1530 | * flag and then free the items. Note that unpinning the items can result in | ||
1531 | * them being freed immediately, so we need to use a safe list traversal method | ||
1532 | * here. | ||
1508 | */ | 1533 | */ |
1509 | STATIC void | 1534 | STATIC void |
1510 | xfs_trans_uncommit( | 1535 | xfs_trans_uncommit( |
1511 | struct xfs_trans *tp, | 1536 | struct xfs_trans *tp, |
1512 | uint flags) | 1537 | uint flags) |
1513 | { | 1538 | { |
1514 | struct xfs_log_item_desc *lidp; | 1539 | struct xfs_log_item_desc *lidp, *n; |
1515 | 1540 | ||
1516 | list_for_each_entry(lidp, &tp->t_items, lid_trans) { | 1541 | list_for_each_entry_safe(lidp, n, &tp->t_items, lid_trans) { |
1517 | /* | ||
1518 | * Unpin all but those that aren't dirty. | ||
1519 | */ | ||
1520 | if (lidp->lid_flags & XFS_LID_DIRTY) | 1542 | if (lidp->lid_flags & XFS_LID_DIRTY) |
1521 | IOP_UNPIN(lidp->lid_item, 1); | 1543 | IOP_UNPIN(lidp->lid_item, 1); |
1522 | } | 1544 | } |