aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs
diff options
context:
space:
mode:
authorBrian Foster <bfoster@redhat.com>2019-04-12 10:39:20 -0400
committerDarrick J. Wong <darrick.wong@oracle.com>2019-04-14 21:15:56 -0400
commit545aa41f5cba935d595a95cda650ffbdf87575ee (patch)
treece3a51fcb59b3b1aecc99c923c41cbb6e4299731 /fs/xfs
parent4d09807f20462d6edf04f6e98d3d47bcdf7a5e2f (diff)
xfs: wake commit waiters on CIL abort before log item abort
XFS shutdown deadlocks have been reproduced by fstest generic/475. The deadlock signature involves log I/O completion running error handling to abort logged items and waiting for an inode cluster buffer lock in the buffer item unpin handler. The buffer lock is held by xfsaild attempting to flush an inode. The buffer happens to be pinned and so xfs_iflush() triggers an async log force to begin work required to get it unpinned. The log force is blocked waiting on the commit completion, which never occurs and thus leaves the filesystem deadlocked. The root problem is that aborted log I/O completion pots commit completion behind callback completion, which is unexpected for async log forces. Under normal running conditions, an async log force returns to the caller once the CIL ctx has been formatted/submitted and the commit completion event triggered at the tail end of xlog_cil_push(). If the filesystem has shutdown, however, we rely on xlog_cil_committed() to trigger the completion event and it happens to do so after running log item unpin callbacks. This makes it unsafe to invoke an async log force from contexts that hold locks that might also be required in log completion processing. To address this problem, wake commit completion waiters before aborting log items in the log I/O completion handler. This ensures that an async log force will not deadlock on held locks if the filesystem happens to shutdown. Note that it is still unsafe to issue a sync log force while holding such locks because a sync log force explicitly waits on the force completion, which occurs after log I/O completion processing. Signed-off-by: Brian Foster <bfoster@redhat.com> Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Diffstat (limited to 'fs/xfs')
-rw-r--r--fs/xfs/xfs_log_cil.c21
1 files changed, 13 insertions, 8 deletions
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index d3884e08b43c..5e595948bc5a 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -582,6 +582,19 @@ xlog_cil_committed(
582 struct xfs_cil_ctx *ctx = args; 582 struct xfs_cil_ctx *ctx = args;
583 struct xfs_mount *mp = ctx->cil->xc_log->l_mp; 583 struct xfs_mount *mp = ctx->cil->xc_log->l_mp;
584 584
585 /*
586 * If the I/O failed, we're aborting the commit and already shutdown.
587 * Wake any commit waiters before aborting the log items so we don't
588 * block async log pushers on callbacks. Async log pushers explicitly do
589 * not wait on log force completion because they may be holding locks
590 * required to unpin items.
591 */
592 if (abort) {
593 spin_lock(&ctx->cil->xc_push_lock);
594 wake_up_all(&ctx->cil->xc_commit_wait);
595 spin_unlock(&ctx->cil->xc_push_lock);
596 }
597
585 xfs_trans_committed_bulk(ctx->cil->xc_log->l_ailp, ctx->lv_chain, 598 xfs_trans_committed_bulk(ctx->cil->xc_log->l_ailp, ctx->lv_chain,
586 ctx->start_lsn, abort); 599 ctx->start_lsn, abort);
587 600
@@ -589,15 +602,7 @@ xlog_cil_committed(
589 xfs_extent_busy_clear(mp, &ctx->busy_extents, 602 xfs_extent_busy_clear(mp, &ctx->busy_extents,
590 (mp->m_flags & XFS_MOUNT_DISCARD) && !abort); 603 (mp->m_flags & XFS_MOUNT_DISCARD) && !abort);
591 604
592 /*
593 * If we are aborting the commit, wake up anyone waiting on the
594 * committing list. If we don't, then a shutdown we can leave processes
595 * waiting in xlog_cil_force_lsn() waiting on a sequence commit that
596 * will never happen because we aborted it.
597 */
598 spin_lock(&ctx->cil->xc_push_lock); 605 spin_lock(&ctx->cil->xc_push_lock);
599 if (abort)
600 wake_up_all(&ctx->cil->xc_commit_wait);
601 list_del(&ctx->committing); 606 list_del(&ctx->committing);
602 spin_unlock(&ctx->cil->xc_push_lock); 607 spin_unlock(&ctx->cil->xc_push_lock);
603 608