summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBrian Foster <bfoster@redhat.com>2015-08-18 19:51:16 -0400
committerDave Chinner <david@fromorbit.com>2015-08-18 19:51:16 -0400
commit8d99fe92fed019e203f458370129fb28b3fb5740 (patch)
tree19adad6d8b66eef407b5180cd968f5f4b8cd8958
parentd43ac29be7a174f93a3d26cc1e68668fe86b782f (diff)
xfs: fix efi/efd error handling to avoid fs shutdown hangs
Freeing an extent in XFS involves logging an EFI (extent free intention), freeing the actual extent, and logging an EFD (extent free done). The EFI object is created with a reference count of 2: one for the current transaction and one for the subsequently created EFD. Under normal circumstances, the first reference is dropped when the EFI is unpinned and the second reference is dropped when the EFD is committed to the on-disk log. In event of errors or filesystem shutdown, there are various potential cleanup scenarios depending on the state of the EFI/EFD. The cleanup scenarios are confusing and racy, as demonstrated by the following test sequence: # mount $dev $mnt # fsstress -d $mnt -n 99999 -p 16 -z -f fallocate=1 \ -f punch=1 -f creat=1 -f unlink=1 & # sleep 5 # killall -9 fsstress; wait # godown -f $mnt # umount ... in which the final umount can hang due to the AIL being pinned indefinitely by one or more EFI items. This can occur due to several conditions. For example, if the shutdown occurs after the EFI is committed to the on-disk log and the EFD committed to the CIL, but before the EFD committed to the log, the EFD iop_committed() abort handler does not drop its reference to the EFI. Alternatively, manual error injection in the xfs_bmap_finish() codepath shows that if an error occurs after the EFI transaction is committed but before the EFD is constructed and logged, the EFI is never released from the AIL. Update the EFI/EFD item handling code to use a more straightforward and reliable approach to error handling. If an error occurs after the EFI transaction is committed and before the EFD is constructed, release the EFI explicitly from xfs_bmap_finish(). If the EFI transaction is cancelled, release the EFI in the unlock handler. Once the EFD is constructed, it is responsible for releasing the EFI under any circumstances (including whether the EFI item aborts due to log I/O error). Update the EFD item handlers to release the EFI if the transaction is cancelled or aborts due to log I/O error. Finally, update xfs_bmap_finish() to log at least one EFD extent to the transaction before xfs_free_extent() errors are handled to ensure the transaction is dirty and EFD item error handling is triggered. Signed-off-by: Brian Foster <bfoster@redhat.com> Reviewed-by: Dave Chinner <dchinner@redhat.com> Signed-off-by: Dave Chinner <david@fromorbit.com>
-rw-r--r--fs/xfs/xfs_bmap_util.c84
-rw-r--r--fs/xfs/xfs_extfree_item.c69
-rw-r--r--fs/xfs/xfs_extfree_item.h25
3 files changed, 111 insertions, 67 deletions
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 0f34886cf726..fa65f67737c5 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -67,16 +67,15 @@ xfs_fsb_to_db(struct xfs_inode *ip, xfs_fsblock_t fsb)
67 */ 67 */
68int /* error */ 68int /* error */
69xfs_bmap_finish( 69xfs_bmap_finish(
70 xfs_trans_t **tp, /* transaction pointer addr */ 70 struct xfs_trans **tp, /* transaction pointer addr */
71 xfs_bmap_free_t *flist, /* i/o: list extents to free */ 71 struct xfs_bmap_free *flist, /* i/o: list extents to free */
72 int *committed) /* xact committed or not */ 72 int *committed)/* xact committed or not */
73{ 73{
74 xfs_efd_log_item_t *efd; /* extent free data */ 74 struct xfs_efd_log_item *efd; /* extent free data */
75 xfs_efi_log_item_t *efi; /* extent free intention */ 75 struct xfs_efi_log_item *efi; /* extent free intention */
76 int error; /* error return value */ 76 int error; /* error return value */
77 xfs_bmap_free_item_t *free; /* free extent item */ 77 struct xfs_bmap_free_item *free; /* free extent item */
78 xfs_mount_t *mp; /* filesystem mount structure */ 78 struct xfs_bmap_free_item *next; /* next item on free list */
79 xfs_bmap_free_item_t *next; /* next item on free list */
80 79
81 ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES); 80 ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES);
82 if (flist->xbf_count == 0) { 81 if (flist->xbf_count == 0) {
@@ -88,40 +87,55 @@ xfs_bmap_finish(
88 xfs_trans_log_efi_extent(*tp, efi, free->xbfi_startblock, 87 xfs_trans_log_efi_extent(*tp, efi, free->xbfi_startblock,
89 free->xbfi_blockcount); 88 free->xbfi_blockcount);
90 89
91 error = xfs_trans_roll(tp, NULL); 90 error = __xfs_trans_roll(tp, NULL, committed);
92 *committed = 1; 91 if (error) {
93 /* 92 /*
94 * We have a new transaction, so we should return committed=1, 93 * If the transaction was committed, drop the EFD reference
95 * even though we're returning an error. 94 * since we're bailing out of here. The other reference is
96 */ 95 * dropped when the EFI hits the AIL.
97 if (error) 96 *
97 * If the transaction was not committed, the EFI is freed by the
98 * EFI item unlock handler on abort. Also, we have a new
99 * transaction so we should return committed=1 even though we're
100 * returning an error.
101 */
102 if (*committed) {
103 xfs_efi_release(efi);
104 xfs_force_shutdown((*tp)->t_mountp,
105 (error == -EFSCORRUPTED) ?
106 SHUTDOWN_CORRUPT_INCORE :
107 SHUTDOWN_META_IO_ERROR);
108 } else {
109 *committed = 1;
110 }
111
98 return error; 112 return error;
113 }
99 114
100 efd = xfs_trans_get_efd(*tp, efi, flist->xbf_count); 115 efd = xfs_trans_get_efd(*tp, efi, flist->xbf_count);
101 for (free = flist->xbf_first; free != NULL; free = next) { 116 for (free = flist->xbf_first; free != NULL; free = next) {
102 next = free->xbfi_next; 117 next = free->xbfi_next;
103 if ((error = xfs_free_extent(*tp, free->xbfi_startblock, 118
104 free->xbfi_blockcount))) { 119 /*
105 /* 120 * Free the extent and log the EFD to dirty the transaction
106 * The bmap free list will be cleaned up at a 121 * before handling errors. This ensures that the transaction is
107 * higher level. The EFI will be canceled when 122 * aborted, which:
108 * this transaction is aborted. 123 *
109 * Need to force shutdown here to make sure it 124 * 1.) releases the EFI and frees the EFD
110 * happens, since this transaction may not be 125 * 2.) shuts down the filesystem
111 * dirty yet. 126 *
112 */ 127 * The bmap free list is cleaned up at a higher level.
113 mp = (*tp)->t_mountp; 128 */
114 if (!XFS_FORCED_SHUTDOWN(mp)) 129 error = xfs_free_extent(*tp, free->xbfi_startblock,
115 xfs_force_shutdown(mp, 130 free->xbfi_blockcount);
116 (error == -EFSCORRUPTED) ?
117 SHUTDOWN_CORRUPT_INCORE :
118 SHUTDOWN_META_IO_ERROR);
119 return error;
120 }
121 xfs_trans_log_efd_extent(*tp, efd, free->xbfi_startblock, 131 xfs_trans_log_efd_extent(*tp, efd, free->xbfi_startblock,
122 free->xbfi_blockcount); 132 free->xbfi_blockcount);
133 if (error)
134 return error;
135
123 xfs_bmap_del_free(flist, NULL, free); 136 xfs_bmap_del_free(flist, NULL, free);
124 } 137 }
138
125 return 0; 139 return 0;
126} 140}
127 141
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index 6ff738fe331a..475b9f00ae23 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -61,9 +61,15 @@ __xfs_efi_release(
61 61
62 if (atomic_dec_and_test(&efip->efi_refcount)) { 62 if (atomic_dec_and_test(&efip->efi_refcount)) {
63 spin_lock(&ailp->xa_lock); 63 spin_lock(&ailp->xa_lock);
64 /* xfs_trans_ail_delete() drops the AIL lock. */ 64 /*
65 xfs_trans_ail_delete(ailp, &efip->efi_item, 65 * We don't know whether the EFI made it to the AIL. Remove it
66 SHUTDOWN_LOG_IO_ERROR); 66 * if so. Note that xfs_trans_ail_delete() drops the AIL lock.
67 */
68 if (efip->efi_item.li_flags & XFS_LI_IN_AIL)
69 xfs_trans_ail_delete(ailp, &efip->efi_item,
70 SHUTDOWN_LOG_IO_ERROR);
71 else
72 spin_unlock(&ailp->xa_lock);
67 xfs_efi_item_free(efip); 73 xfs_efi_item_free(efip);
68 } 74 }
69} 75}
@@ -128,12 +134,12 @@ xfs_efi_item_pin(
128} 134}
129 135
130/* 136/*
131 * While EFIs cannot really be pinned, the unpin operation is the last place at 137 * The unpin operation is the last place an EFI is manipulated in the log. It is
132 * which the EFI is manipulated during a transaction. If we are being asked to 138 * either inserted in the AIL or aborted in the event of a log I/O error. In
133 * remove the EFI it's because the transaction has been cancelled and by 139 * either case, the EFI transaction has been successfully committed to make it
134 * definition that means the EFI cannot be in the AIL so remove it from the 140 * this far. Therefore, we expect whoever committed the EFI to either construct
135 * transaction and free it. Otherwise coordinate with xfs_efi_release() 141 * and commit the EFD or drop the EFD's reference in the event of error. Simply
136 * to determine who gets to free the EFI. 142 * drop the log's EFI reference now that the log is done with it.
137 */ 143 */
138STATIC void 144STATIC void
139xfs_efi_item_unpin( 145xfs_efi_item_unpin(
@@ -141,14 +147,6 @@ xfs_efi_item_unpin(
141 int remove) 147 int remove)
142{ 148{
143 struct xfs_efi_log_item *efip = EFI_ITEM(lip); 149 struct xfs_efi_log_item *efip = EFI_ITEM(lip);
144
145 if (remove) {
146 ASSERT(!(lip->li_flags & XFS_LI_IN_AIL));
147 if (lip->li_desc)
148 xfs_trans_del_item(lip);
149 xfs_efi_item_free(efip);
150 return;
151 }
152 xfs_efi_release(efip); 150 xfs_efi_release(efip);
153} 151}
154 152
@@ -167,6 +165,11 @@ xfs_efi_item_push(
167 return XFS_ITEM_PINNED; 165 return XFS_ITEM_PINNED;
168} 166}
169 167
168/*
169 * The EFI has been either committed or aborted if the transaction has been
170 * cancelled. If the transaction was cancelled, an EFD isn't going to be
171 * constructed and thus we free the EFI here directly.
172 */
170STATIC void 173STATIC void
171xfs_efi_item_unlock( 174xfs_efi_item_unlock(
172 struct xfs_log_item *lip) 175 struct xfs_log_item *lip)
@@ -412,20 +415,27 @@ xfs_efd_item_push(
412 return XFS_ITEM_PINNED; 415 return XFS_ITEM_PINNED;
413} 416}
414 417
418/*
419 * The EFD is either committed or aborted if the transaction is cancelled. If
420 * the transaction is cancelled, drop our reference to the EFI and free the EFD.
421 */
415STATIC void 422STATIC void
416xfs_efd_item_unlock( 423xfs_efd_item_unlock(
417 struct xfs_log_item *lip) 424 struct xfs_log_item *lip)
418{ 425{
419 if (lip->li_flags & XFS_LI_ABORTED) 426 struct xfs_efd_log_item *efdp = EFD_ITEM(lip);
420 xfs_efd_item_free(EFD_ITEM(lip)); 427
428 if (lip->li_flags & XFS_LI_ABORTED) {
429 xfs_efi_release(efdp->efd_efip);
430 xfs_efd_item_free(efdp);
431 }
421} 432}
422 433
423/* 434/*
424 * When the efd item is committed to disk, all we need to do 435 * When the efd item is committed to disk, all we need to do is delete our
425 * is delete our reference to our partner efi item and then 436 * reference to our partner efi item and then free ourselves. Since we're
426 * free ourselves. Since we're freeing ourselves we must 437 * freeing ourselves we must return -1 to keep the transaction code from further
427 * return -1 to keep the transaction code from further referencing 438 * referencing this item.
428 * this item.
429 */ 439 */
430STATIC xfs_lsn_t 440STATIC xfs_lsn_t
431xfs_efd_item_committed( 441xfs_efd_item_committed(
@@ -435,13 +445,14 @@ xfs_efd_item_committed(
435 struct xfs_efd_log_item *efdp = EFD_ITEM(lip); 445 struct xfs_efd_log_item *efdp = EFD_ITEM(lip);
436 446
437 /* 447 /*
438 * If we got a log I/O error, it's always the case that the LR with the 448 * Drop the EFI reference regardless of whether the EFD has been
439 * EFI got unpinned and freed before the EFD got aborted. 449 * aborted. Once the EFD transaction is constructed, it is the sole
450 * responsibility of the EFD to release the EFI (even if the EFI is
451 * aborted due to log I/O error).
440 */ 452 */
441 if (!(lip->li_flags & XFS_LI_ABORTED)) 453 xfs_efi_release(efdp->efd_efip);
442 xfs_efi_release(efdp->efd_efip);
443
444 xfs_efd_item_free(efdp); 454 xfs_efd_item_free(efdp);
455
445 return (xfs_lsn_t)-1; 456 return (xfs_lsn_t)-1;
446} 457}
447 458
diff --git a/fs/xfs/xfs_extfree_item.h b/fs/xfs/xfs_extfree_item.h
index 399562eaf4f5..8fa8651705e1 100644
--- a/fs/xfs/xfs_extfree_item.h
+++ b/fs/xfs/xfs_extfree_item.h
@@ -39,9 +39,28 @@ struct kmem_zone;
39 * "extent free done" log item described below. 39 * "extent free done" log item described below.
40 * 40 *
41 * The EFI is reference counted so that it is not freed prior to both the EFI 41 * The EFI is reference counted so that it is not freed prior to both the EFI
42 * and EFD being committed and unpinned. This ensures that when the last 42 * and EFD being committed and unpinned. This ensures the EFI is inserted into
43 * reference goes away the EFI will always be in the AIL as it has been 43 * the AIL even in the event of out of order EFI/EFD processing. In other words,
44 * unpinned, regardless of whether the EFD is processed before or after the EFI. 44 * an EFI is born with two references:
45 *
46 * 1.) an EFI held reference to track EFI AIL insertion
47 * 2.) an EFD held reference to track EFD commit
48 *
49 * On allocation, both references are the responsibility of the caller. Once the
50 * EFI is added to and dirtied in a transaction, ownership of reference one
51 * transfers to the transaction. The reference is dropped once the EFI is
52 * inserted to the AIL or in the event of failure along the way (e.g., commit
53 * failure, log I/O error, etc.). Note that the caller remains responsible for
54 * the EFD reference under all circumstances to this point. The caller has no
55 * means to detect failure once the transaction is committed, however.
56 * Therefore, an EFD is required after this point, even in the event of
57 * unrelated failure.
58 *
59 * Once an EFD is allocated and dirtied in a transaction, reference two
60 * transfers to the transaction. The EFD reference is dropped once it reaches
61 * the unpin handler. Similar to the EFI, the reference also drops in the event
62 * of commit failure or log I/O errors. Note that the EFD is not inserted in the
63 * AIL, so at this point both the EFI and EFD are freed.
45 */ 64 */
46typedef struct xfs_efi_log_item { 65typedef struct xfs_efi_log_item {
47 xfs_log_item_t efi_item; 66 xfs_log_item_t efi_item;