aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJames Smart <jsmart2021@gmail.com>2017-09-14 14:03:09 -0400
committerJens Axboe <axboe@kernel.dk>2017-09-25 10:56:05 -0400
commitcd48282cc736377d5abf7c04de8c6ba864ba3794 (patch)
tree9ec3b321ff9471b07d1a2119673259f67313c049
parentd08774738446e77734777adcf5d1045237b4475a (diff)
nvme: stop aer posting if controller state not live
If an nvme async_event command completes, in most cases, a new async event is posted. However, if the controller enters a resetting or reconnecting state, there is nothing to block the scheduled work element from posting the async event again. Nor are there calls from the transport to stop async events when an association dies. In the case of FC, where the association is torn down, the aer must be aborted on the FC link and completes through the normal job completion path. Thus the terminated async event ends up being rescheduled even though the controller isn't in a valid state for the aer, and the reposting gets the transport into a partially torn down data structure. It's possible to hit the scenario on rdma, although much less likely due to an aer completing right as the association is terminated and as the association teardown reclaims the blk requests via nvme_cancel_request() so its immediate, not a link-related action like on FC. Fix by putting controller state checks in both the async event completion routine where it schedules the async event and in the async event work routine before it calls into the transport. It's effectively a "stop_async_events()" behavior. The transport, when it creates a new association with the subsystem will transition the state back to live and is already restarting the async event posting. Signed-off-by: James Smart <james.smart@broadcom.com> [hch: remove taking a lock over reading the controller state] Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Jens Axboe <axboe@kernel.dk>
-rw-r--r--drivers/nvme/host/core.c5
1 files changed, 3 insertions, 2 deletions
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index acc816b67582..d470f031e27f 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -2590,7 +2590,7 @@ static void nvme_async_event_work(struct work_struct *work)
2590 container_of(work, struct nvme_ctrl, async_event_work); 2590 container_of(work, struct nvme_ctrl, async_event_work);
2591 2591
2592 spin_lock_irq(&ctrl->lock); 2592 spin_lock_irq(&ctrl->lock);
2593 while (ctrl->event_limit > 0) { 2593 while (ctrl->state == NVME_CTRL_LIVE && ctrl->event_limit > 0) {
2594 int aer_idx = --ctrl->event_limit; 2594 int aer_idx = --ctrl->event_limit;
2595 2595
2596 spin_unlock_irq(&ctrl->lock); 2596 spin_unlock_irq(&ctrl->lock);
@@ -2677,7 +2677,8 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
2677 /*FALLTHRU*/ 2677 /*FALLTHRU*/
2678 case NVME_SC_ABORT_REQ: 2678 case NVME_SC_ABORT_REQ:
2679 ++ctrl->event_limit; 2679 ++ctrl->event_limit;
2680 queue_work(nvme_wq, &ctrl->async_event_work); 2680 if (ctrl->state == NVME_CTRL_LIVE)
2681 schedule_work(&ctrl->async_event_work);
2681 break; 2682 break;
2682 default: 2683 default:
2683 break; 2684 break;