aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/block
diff options
context:
space:
mode:
authorKeith Busch <keith.busch@intel.com>2015-02-19 12:34:48 -0500
committerKeith Busch <keith.busch@intel.com>2015-02-19 18:15:37 -0500
commit07836e659c81ec6b0d683dfbf7958339a22a7b69 (patch)
tree6c043eb7b855825c6cb304f19cb0103bc4303cc2 /drivers/block
parent2e1d8448196ba85cd78a18723413a3c92aabe0f3 (diff)
NVMe: Fix potential corruption during shutdown
The driver has to end unreturned commands at some point even if the controller has not provided a completion. The driver tried to be safe by deleting IO queues prior to ending all unreturned commands. That should cause the controller to internally abort inflight commands, but IO queue deletion request does not have to be successful, so all bets are off. We still have to make progress, so to be extra safe, this patch doesn't clear a queue to release the dma mapping for a command until after the pci device has been disabled. This patch removes the special handling during device initialization so controller recovery can be done all the time. This is possible since initialization is not inlined with pci probe anymore. Reported-by: Nilish Choudhury <nilesh.choudhury@oracle.com> Signed-off-by: Keith Busch <keith.busch@intel.com>
Diffstat (limited to 'drivers/block')
-rw-r--r--drivers/block/nvme-core.c49
1 files changed, 19 insertions, 30 deletions
diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index a57685f74e5e..0f388206b15b 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -1274,29 +1274,18 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
1274 struct nvme_cmd_info *cmd = blk_mq_rq_to_pdu(req); 1274 struct nvme_cmd_info *cmd = blk_mq_rq_to_pdu(req);
1275 struct nvme_queue *nvmeq = cmd->nvmeq; 1275 struct nvme_queue *nvmeq = cmd->nvmeq;
1276 1276
1277 /*
1278 * The aborted req will be completed on receiving the abort req.
1279 * We enable the timer again. If hit twice, it'll cause a device reset,
1280 * as the device then is in a faulty state.
1281 */
1282 int ret = BLK_EH_RESET_TIMER;
1283
1284 dev_warn(nvmeq->q_dmadev, "Timeout I/O %d QID %d\n", req->tag, 1277 dev_warn(nvmeq->q_dmadev, "Timeout I/O %d QID %d\n", req->tag,
1285 nvmeq->qid); 1278 nvmeq->qid);
1286
1287 spin_lock_irq(&nvmeq->q_lock); 1279 spin_lock_irq(&nvmeq->q_lock);
1288 if (!nvmeq->dev->initialized) { 1280 nvme_abort_req(req);
1289 /*
1290 * Force cancelled command frees the request, which requires we
1291 * return BLK_EH_NOT_HANDLED.
1292 */
1293 nvme_cancel_queue_ios(nvmeq->hctx, req, nvmeq, reserved);
1294 ret = BLK_EH_NOT_HANDLED;
1295 } else
1296 nvme_abort_req(req);
1297 spin_unlock_irq(&nvmeq->q_lock); 1281 spin_unlock_irq(&nvmeq->q_lock);
1298 1282
1299 return ret; 1283 /*
1284 * The aborted req will be completed on receiving the abort req.
1285 * We enable the timer again. If hit twice, it'll cause a device reset,
1286 * as the device then is in a faulty state.
1287 */
1288 return BLK_EH_RESET_TIMER;
1300} 1289}
1301 1290
1302static void nvme_free_queue(struct nvme_queue *nvmeq) 1291static void nvme_free_queue(struct nvme_queue *nvmeq)
@@ -1349,7 +1338,6 @@ static void nvme_clear_queue(struct nvme_queue *nvmeq)
1349 struct blk_mq_hw_ctx *hctx = nvmeq->hctx; 1338 struct blk_mq_hw_ctx *hctx = nvmeq->hctx;
1350 1339
1351 spin_lock_irq(&nvmeq->q_lock); 1340 spin_lock_irq(&nvmeq->q_lock);
1352 nvme_process_cq(nvmeq);
1353 if (hctx && hctx->tags) 1341 if (hctx && hctx->tags)
1354 blk_mq_tag_busy_iter(hctx, nvme_cancel_queue_ios, nvmeq); 1342 blk_mq_tag_busy_iter(hctx, nvme_cancel_queue_ios, nvmeq);
1355 spin_unlock_irq(&nvmeq->q_lock); 1343 spin_unlock_irq(&nvmeq->q_lock);
@@ -1372,7 +1360,10 @@ static void nvme_disable_queue(struct nvme_dev *dev, int qid)
1372 } 1360 }
1373 if (!qid && dev->admin_q) 1361 if (!qid && dev->admin_q)
1374 blk_mq_freeze_queue_start(dev->admin_q); 1362 blk_mq_freeze_queue_start(dev->admin_q);
1375 nvme_clear_queue(nvmeq); 1363
1364 spin_lock_irq(&nvmeq->q_lock);
1365 nvme_process_cq(nvmeq);
1366 spin_unlock_irq(&nvmeq->q_lock);
1376} 1367}
1377 1368
1378static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid, 1369static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
@@ -2121,8 +2112,7 @@ static int nvme_kthread(void *data)
2121 spin_lock(&dev_list_lock); 2112 spin_lock(&dev_list_lock);
2122 list_for_each_entry_safe(dev, next, &dev_list, node) { 2113 list_for_each_entry_safe(dev, next, &dev_list, node) {
2123 int i; 2114 int i;
2124 if (readl(&dev->bar->csts) & NVME_CSTS_CFS && 2115 if (readl(&dev->bar->csts) & NVME_CSTS_CFS) {
2125 dev->initialized) {
2126 if (work_busy(&dev->reset_work)) 2116 if (work_busy(&dev->reset_work))
2127 continue; 2117 continue;
2128 list_del_init(&dev->node); 2118 list_del_init(&dev->node);
@@ -2525,8 +2515,6 @@ static struct nvme_delq_ctx *nvme_get_dq(struct nvme_delq_ctx *dq)
2525static void nvme_del_queue_end(struct nvme_queue *nvmeq) 2515static void nvme_del_queue_end(struct nvme_queue *nvmeq)
2526{ 2516{
2527 struct nvme_delq_ctx *dq = nvmeq->cmdinfo.ctx; 2517 struct nvme_delq_ctx *dq = nvmeq->cmdinfo.ctx;
2528
2529 nvme_clear_queue(nvmeq);
2530 nvme_put_dq(dq); 2518 nvme_put_dq(dq);
2531} 2519}
2532 2520
@@ -2669,7 +2657,6 @@ static void nvme_dev_shutdown(struct nvme_dev *dev)
2669 int i; 2657 int i;
2670 u32 csts = -1; 2658 u32 csts = -1;
2671 2659
2672 dev->initialized = 0;
2673 nvme_dev_list_remove(dev); 2660 nvme_dev_list_remove(dev);
2674 2661
2675 if (dev->bar) { 2662 if (dev->bar) {
@@ -2680,7 +2667,6 @@ static void nvme_dev_shutdown(struct nvme_dev *dev)
2680 for (i = dev->queue_count - 1; i >= 0; i--) { 2667 for (i = dev->queue_count - 1; i >= 0; i--) {
2681 struct nvme_queue *nvmeq = dev->queues[i]; 2668 struct nvme_queue *nvmeq = dev->queues[i];
2682 nvme_suspend_queue(nvmeq); 2669 nvme_suspend_queue(nvmeq);
2683 nvme_clear_queue(nvmeq);
2684 } 2670 }
2685 } else { 2671 } else {
2686 nvme_disable_io_queues(dev); 2672 nvme_disable_io_queues(dev);
@@ -2688,6 +2674,9 @@ static void nvme_dev_shutdown(struct nvme_dev *dev)
2688 nvme_disable_queue(dev, 0); 2674 nvme_disable_queue(dev, 0);
2689 } 2675 }
2690 nvme_dev_unmap(dev); 2676 nvme_dev_unmap(dev);
2677
2678 for (i = dev->queue_count - 1; i >= 0; i--)
2679 nvme_clear_queue(dev->queues[i]);
2691} 2680}
2692 2681
2693static void nvme_dev_remove(struct nvme_dev *dev) 2682static void nvme_dev_remove(struct nvme_dev *dev)
@@ -2955,7 +2944,6 @@ static int nvme_dev_resume(struct nvme_dev *dev)
2955 nvme_unfreeze_queues(dev); 2944 nvme_unfreeze_queues(dev);
2956 nvme_set_irq_hints(dev); 2945 nvme_set_irq_hints(dev);
2957 } 2946 }
2958 dev->initialized = 1;
2959 return 0; 2947 return 0;
2960} 2948}
2961 2949
@@ -3063,11 +3051,12 @@ static void nvme_async_probe(struct work_struct *work)
3063 goto reset; 3051 goto reset;
3064 3052
3065 nvme_set_irq_hints(dev); 3053 nvme_set_irq_hints(dev);
3066 dev->initialized = 1;
3067 return; 3054 return;
3068 reset: 3055 reset:
3069 dev->reset_workfn = nvme_reset_failed_dev; 3056 if (!work_busy(&dev->reset_work)) {
3070 queue_work(nvme_workq, &dev->reset_work); 3057 dev->reset_workfn = nvme_reset_failed_dev;
3058 queue_work(nvme_workq, &dev->reset_work);
3059 }
3071} 3060}
3072 3061
3073static void nvme_reset_notify(struct pci_dev *pdev, bool prepare) 3062static void nvme_reset_notify(struct pci_dev *pdev, bool prepare)