aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/block
diff options
context:
space:
mode:
authorKeith Busch <keith.busch@intel.com>2014-04-03 18:45:23 -0400
committerMatthew Wilcox <matthew.r.wilcox@intel.com>2014-04-10 17:11:59 -0400
commitedd10d33283899fb15d99a290dcc9ceb3604ca78 (patch)
tree870dc908379694e20ea319ba11c81bec5864cef7 /drivers/block
parent4cc09e2dc4cbe6009c935b6f12a8376f09124bc5 (diff)
NVMe: Retry failed commands with non-fatal errors
For commands returned with failed status, queue these for resubmission and continue retrying them until success or for a limited amount of time. The final timeout was arbitrarily chosen so requests can't be retried indefinitely. Since these are requeued on the nvmeq that submitted the command, the callbacks have to take an nvmeq instead of an nvme_dev as a parameter so that we can use the locked queue to append the iod to retry later. The nvme_iod conviently can be used to track how long we've been trying to successfully complete an iod request. The nvme_iod also provides the nvme prp dma mappings, so I had to move a few things around so we can keep those mappings. Signed-off-by: Keith Busch <keith.busch@intel.com> [fixed checkpatch issue with long line] Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
Diffstat (limited to 'drivers/block')
-rw-r--r--drivers/block/nvme-core.c235
-rw-r--r--drivers/block/nvme-scsi.c10
2 files changed, 151 insertions, 94 deletions
diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index 596e2abd7971..efa9c8f4a7a7 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -50,6 +50,7 @@
50#define SQ_SIZE(depth) (depth * sizeof(struct nvme_command)) 50#define SQ_SIZE(depth) (depth * sizeof(struct nvme_command))
51#define CQ_SIZE(depth) (depth * sizeof(struct nvme_completion)) 51#define CQ_SIZE(depth) (depth * sizeof(struct nvme_completion))
52#define ADMIN_TIMEOUT (60 * HZ) 52#define ADMIN_TIMEOUT (60 * HZ)
53#define IOD_TIMEOUT (4 * NVME_IO_TIMEOUT)
53 54
54unsigned char io_timeout = 30; 55unsigned char io_timeout = 30;
55module_param(io_timeout, byte, 0644); 56module_param(io_timeout, byte, 0644);
@@ -94,6 +95,7 @@ struct nvme_queue {
94 wait_queue_head_t sq_full; 95 wait_queue_head_t sq_full;
95 wait_queue_t sq_cong_wait; 96 wait_queue_t sq_cong_wait;
96 struct bio_list sq_cong; 97 struct bio_list sq_cong;
98 struct list_head iod_bio;
97 u32 __iomem *q_db; 99 u32 __iomem *q_db;
98 u16 q_depth; 100 u16 q_depth;
99 u16 cq_vector; 101 u16 cq_vector;
@@ -128,7 +130,7 @@ static inline void _nvme_check_size(void)
128 BUILD_BUG_ON(sizeof(struct nvme_smart_log) != 512); 130 BUILD_BUG_ON(sizeof(struct nvme_smart_log) != 512);
129} 131}
130 132
131typedef void (*nvme_completion_fn)(struct nvme_dev *, void *, 133typedef void (*nvme_completion_fn)(struct nvme_queue *, void *,
132 struct nvme_completion *); 134 struct nvme_completion *);
133 135
134struct nvme_cmd_info { 136struct nvme_cmd_info {
@@ -200,7 +202,7 @@ static int alloc_cmdid_killable(struct nvme_queue *nvmeq, void *ctx,
200#define CMD_CTX_FLUSH (0x318 + CMD_CTX_BASE) 202#define CMD_CTX_FLUSH (0x318 + CMD_CTX_BASE)
201#define CMD_CTX_ABORT (0x31C + CMD_CTX_BASE) 203#define CMD_CTX_ABORT (0x31C + CMD_CTX_BASE)
202 204
203static void special_completion(struct nvme_dev *dev, void *ctx, 205static void special_completion(struct nvme_queue *nvmeq, void *ctx,
204 struct nvme_completion *cqe) 206 struct nvme_completion *cqe)
205{ 207{
206 if (ctx == CMD_CTX_CANCELLED) 208 if (ctx == CMD_CTX_CANCELLED)
@@ -208,26 +210,26 @@ static void special_completion(struct nvme_dev *dev, void *ctx,
208 if (ctx == CMD_CTX_FLUSH) 210 if (ctx == CMD_CTX_FLUSH)
209 return; 211 return;
210 if (ctx == CMD_CTX_ABORT) { 212 if (ctx == CMD_CTX_ABORT) {
211 ++dev->abort_limit; 213 ++nvmeq->dev->abort_limit;
212 return; 214 return;
213 } 215 }
214 if (ctx == CMD_CTX_COMPLETED) { 216 if (ctx == CMD_CTX_COMPLETED) {
215 dev_warn(&dev->pci_dev->dev, 217 dev_warn(nvmeq->q_dmadev,
216 "completed id %d twice on queue %d\n", 218 "completed id %d twice on queue %d\n",
217 cqe->command_id, le16_to_cpup(&cqe->sq_id)); 219 cqe->command_id, le16_to_cpup(&cqe->sq_id));
218 return; 220 return;
219 } 221 }
220 if (ctx == CMD_CTX_INVALID) { 222 if (ctx == CMD_CTX_INVALID) {
221 dev_warn(&dev->pci_dev->dev, 223 dev_warn(nvmeq->q_dmadev,
222 "invalid id %d completed on queue %d\n", 224 "invalid id %d completed on queue %d\n",
223 cqe->command_id, le16_to_cpup(&cqe->sq_id)); 225 cqe->command_id, le16_to_cpup(&cqe->sq_id));
224 return; 226 return;
225 } 227 }
226 228
227 dev_warn(&dev->pci_dev->dev, "Unknown special completion %p\n", ctx); 229 dev_warn(nvmeq->q_dmadev, "Unknown special completion %p\n", ctx);
228} 230}
229 231
230static void async_completion(struct nvme_dev *dev, void *ctx, 232static void async_completion(struct nvme_queue *nvmeq, void *ctx,
231 struct nvme_completion *cqe) 233 struct nvme_completion *cqe)
232{ 234{
233 struct async_cmd_info *cmdinfo = ctx; 235 struct async_cmd_info *cmdinfo = ctx;
@@ -357,6 +359,7 @@ nvme_alloc_iod(unsigned nseg, unsigned nbytes, gfp_t gfp)
357 iod->npages = -1; 359 iod->npages = -1;
358 iod->length = nbytes; 360 iod->length = nbytes;
359 iod->nents = 0; 361 iod->nents = 0;
362 iod->first_dma = 0ULL;
360 iod->start_time = jiffies; 363 iod->start_time = jiffies;
361 } 364 }
362 365
@@ -405,19 +408,31 @@ static void nvme_end_io_acct(struct bio *bio, unsigned long start_time)
405 part_stat_unlock(); 408 part_stat_unlock();
406} 409}
407 410
408static void bio_completion(struct nvme_dev *dev, void *ctx, 411static void bio_completion(struct nvme_queue *nvmeq, void *ctx,
409 struct nvme_completion *cqe) 412 struct nvme_completion *cqe)
410{ 413{
411 struct nvme_iod *iod = ctx; 414 struct nvme_iod *iod = ctx;
412 struct bio *bio = iod->private; 415 struct bio *bio = iod->private;
413 u16 status = le16_to_cpup(&cqe->status) >> 1; 416 u16 status = le16_to_cpup(&cqe->status) >> 1;
414 417
418 if (unlikely(status)) {
419 if (!(status & NVME_SC_DNR ||
420 bio->bi_rw & REQ_FAILFAST_MASK) &&
421 (jiffies - iod->start_time) < IOD_TIMEOUT) {
422 if (!waitqueue_active(&nvmeq->sq_full))
423 add_wait_queue(&nvmeq->sq_full,
424 &nvmeq->sq_cong_wait);
425 list_add_tail(&iod->node, &nvmeq->iod_bio);
426 wake_up(&nvmeq->sq_full);
427 return;
428 }
429 }
415 if (iod->nents) { 430 if (iod->nents) {
416 dma_unmap_sg(&dev->pci_dev->dev, iod->sg, iod->nents, 431 dma_unmap_sg(nvmeq->q_dmadev, iod->sg, iod->nents,
417 bio_data_dir(bio) ? DMA_TO_DEVICE : DMA_FROM_DEVICE); 432 bio_data_dir(bio) ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
418 nvme_end_io_acct(bio, iod->start_time); 433 nvme_end_io_acct(bio, iod->start_time);
419 } 434 }
420 nvme_free_iod(dev, iod); 435 nvme_free_iod(nvmeq->dev, iod);
421 if (status) 436 if (status)
422 bio_endio(bio, -EIO); 437 bio_endio(bio, -EIO);
423 else 438 else
@@ -425,8 +440,8 @@ static void bio_completion(struct nvme_dev *dev, void *ctx,
425} 440}
426 441
427/* length is in bytes. gfp flags indicates whether we may sleep. */ 442/* length is in bytes. gfp flags indicates whether we may sleep. */
428int nvme_setup_prps(struct nvme_dev *dev, struct nvme_common_command *cmd, 443int nvme_setup_prps(struct nvme_dev *dev, struct nvme_iod *iod, int total_len,
429 struct nvme_iod *iod, int total_len, gfp_t gfp) 444 gfp_t gfp)
430{ 445{
431 struct dma_pool *pool; 446 struct dma_pool *pool;
432 int length = total_len; 447 int length = total_len;
@@ -439,7 +454,6 @@ int nvme_setup_prps(struct nvme_dev *dev, struct nvme_common_command *cmd,
439 dma_addr_t prp_dma; 454 dma_addr_t prp_dma;
440 int nprps, i; 455 int nprps, i;
441 456
442 cmd->prp1 = cpu_to_le64(dma_addr);
443 length -= (PAGE_SIZE - offset); 457 length -= (PAGE_SIZE - offset);
444 if (length <= 0) 458 if (length <= 0)
445 return total_len; 459 return total_len;
@@ -454,7 +468,7 @@ int nvme_setup_prps(struct nvme_dev *dev, struct nvme_common_command *cmd,
454 } 468 }
455 469
456 if (length <= PAGE_SIZE) { 470 if (length <= PAGE_SIZE) {
457 cmd->prp2 = cpu_to_le64(dma_addr); 471 iod->first_dma = dma_addr;
458 return total_len; 472 return total_len;
459 } 473 }
460 474
@@ -469,13 +483,12 @@ int nvme_setup_prps(struct nvme_dev *dev, struct nvme_common_command *cmd,
469 483
470 prp_list = dma_pool_alloc(pool, gfp, &prp_dma); 484 prp_list = dma_pool_alloc(pool, gfp, &prp_dma);
471 if (!prp_list) { 485 if (!prp_list) {
472 cmd->prp2 = cpu_to_le64(dma_addr); 486 iod->first_dma = dma_addr;
473 iod->npages = -1; 487 iod->npages = -1;
474 return (total_len - length) + PAGE_SIZE; 488 return (total_len - length) + PAGE_SIZE;
475 } 489 }
476 list[0] = prp_list; 490 list[0] = prp_list;
477 iod->first_dma = prp_dma; 491 iod->first_dma = prp_dma;
478 cmd->prp2 = cpu_to_le64(prp_dma);
479 i = 0; 492 i = 0;
480 for (;;) { 493 for (;;) {
481 if (i == PAGE_SIZE / 8) { 494 if (i == PAGE_SIZE / 8) {
@@ -514,10 +527,11 @@ static int nvme_split_and_submit(struct bio *bio, struct nvme_queue *nvmeq,
514 527
515 bio_chain(split, bio); 528 bio_chain(split, bio);
516 529
517 if (bio_list_empty(&nvmeq->sq_cong)) 530 if (!waitqueue_active(&nvmeq->sq_full))
518 add_wait_queue(&nvmeq->sq_full, &nvmeq->sq_cong_wait); 531 add_wait_queue(&nvmeq->sq_full, &nvmeq->sq_cong_wait);
519 bio_list_add(&nvmeq->sq_cong, split); 532 bio_list_add(&nvmeq->sq_cong, split);
520 bio_list_add(&nvmeq->sq_cong, bio); 533 bio_list_add(&nvmeq->sq_cong, bio);
534 wake_up(&nvmeq->sq_full);
521 535
522 return 0; 536 return 0;
523} 537}
@@ -570,25 +584,13 @@ static int nvme_map_bio(struct nvme_queue *nvmeq, struct nvme_iod *iod,
570 return length; 584 return length;
571} 585}
572 586
573/*
574 * We reuse the small pool to allocate the 16-byte range here as it is not
575 * worth having a special pool for these or additional cases to handle freeing
576 * the iod.
577 */
578static int nvme_submit_discard(struct nvme_queue *nvmeq, struct nvme_ns *ns, 587static int nvme_submit_discard(struct nvme_queue *nvmeq, struct nvme_ns *ns,
579 struct bio *bio, struct nvme_iod *iod, int cmdid) 588 struct bio *bio, struct nvme_iod *iod, int cmdid)
580{ 589{
581 struct nvme_dsm_range *range; 590 struct nvme_dsm_range *range =
591 (struct nvme_dsm_range *)iod_list(iod)[0];
582 struct nvme_command *cmnd = &nvmeq->sq_cmds[nvmeq->sq_tail]; 592 struct nvme_command *cmnd = &nvmeq->sq_cmds[nvmeq->sq_tail];
583 593
584 range = dma_pool_alloc(nvmeq->dev->prp_small_pool, GFP_ATOMIC,
585 &iod->first_dma);
586 if (!range)
587 return -ENOMEM;
588
589 iod_list(iod)[0] = (__le64 *)range;
590 iod->npages = 0;
591
592 range->cattr = cpu_to_le32(0); 594 range->cattr = cpu_to_le32(0);
593 range->nlb = cpu_to_le32(bio->bi_iter.bi_size >> ns->lba_shift); 595 range->nlb = cpu_to_le32(bio->bi_iter.bi_size >> ns->lba_shift);
594 range->slba = cpu_to_le64(nvme_block_nr(ns, bio->bi_iter.bi_sector)); 596 range->slba = cpu_to_le64(nvme_block_nr(ns, bio->bi_iter.bi_sector));
@@ -635,44 +637,22 @@ int nvme_submit_flush_data(struct nvme_queue *nvmeq, struct nvme_ns *ns)
635 return nvme_submit_flush(nvmeq, ns, cmdid); 637 return nvme_submit_flush(nvmeq, ns, cmdid);
636} 638}
637 639
638/* 640static int nvme_submit_iod(struct nvme_queue *nvmeq, struct nvme_iod *iod)
639 * Called with local interrupts disabled and the q_lock held. May not sleep.
640 */
641static int nvme_submit_bio_queue(struct nvme_queue *nvmeq, struct nvme_ns *ns,
642 struct bio *bio)
643{ 641{
642 struct bio *bio = iod->private;
643 struct nvme_ns *ns = bio->bi_bdev->bd_disk->private_data;
644 struct nvme_command *cmnd; 644 struct nvme_command *cmnd;
645 struct nvme_iod *iod; 645 int cmdid;
646 enum dma_data_direction dma_dir;
647 int cmdid, length, result;
648 u16 control; 646 u16 control;
649 u32 dsmgmt; 647 u32 dsmgmt;
650 int psegs = bio_phys_segments(ns->queue, bio);
651
652 if ((bio->bi_rw & REQ_FLUSH) && psegs) {
653 result = nvme_submit_flush_data(nvmeq, ns);
654 if (result)
655 return result;
656 }
657
658 result = -ENOMEM;
659 iod = nvme_alloc_iod(psegs, bio->bi_iter.bi_size, GFP_ATOMIC);
660 if (!iod)
661 goto nomem;
662 iod->private = bio;
663 648
664 result = -EBUSY;
665 cmdid = alloc_cmdid(nvmeq, iod, bio_completion, NVME_IO_TIMEOUT); 649 cmdid = alloc_cmdid(nvmeq, iod, bio_completion, NVME_IO_TIMEOUT);
666 if (unlikely(cmdid < 0)) 650 if (unlikely(cmdid < 0))
667 goto free_iod; 651 return cmdid;
668 652
669 if (bio->bi_rw & REQ_DISCARD) { 653 if (bio->bi_rw & REQ_DISCARD)
670 result = nvme_submit_discard(nvmeq, ns, bio, iod, cmdid); 654 return nvme_submit_discard(nvmeq, ns, bio, iod, cmdid);
671 if (result) 655 if ((bio->bi_rw & REQ_FLUSH) && !iod->nents)
672 goto free_cmdid;
673 return result;
674 }
675 if ((bio->bi_rw & REQ_FLUSH) && !psegs)
676 return nvme_submit_flush(nvmeq, ns, cmdid); 656 return nvme_submit_flush(nvmeq, ns, cmdid);
677 657
678 control = 0; 658 control = 0;
@@ -686,42 +666,85 @@ static int nvme_submit_bio_queue(struct nvme_queue *nvmeq, struct nvme_ns *ns,
686 dsmgmt |= NVME_RW_DSM_FREQ_PREFETCH; 666 dsmgmt |= NVME_RW_DSM_FREQ_PREFETCH;
687 667
688 cmnd = &nvmeq->sq_cmds[nvmeq->sq_tail]; 668 cmnd = &nvmeq->sq_cmds[nvmeq->sq_tail];
689
690 memset(cmnd, 0, sizeof(*cmnd)); 669 memset(cmnd, 0, sizeof(*cmnd));
691 if (bio_data_dir(bio)) {
692 cmnd->rw.opcode = nvme_cmd_write;
693 dma_dir = DMA_TO_DEVICE;
694 } else {
695 cmnd->rw.opcode = nvme_cmd_read;
696 dma_dir = DMA_FROM_DEVICE;
697 }
698
699 result = nvme_map_bio(nvmeq, iod, bio, dma_dir, psegs);
700 if (result <= 0)
701 goto free_cmdid;
702 length = result;
703 670
671 cmnd->rw.opcode = bio_data_dir(bio) ? nvme_cmd_write : nvme_cmd_read;
704 cmnd->rw.command_id = cmdid; 672 cmnd->rw.command_id = cmdid;
705 cmnd->rw.nsid = cpu_to_le32(ns->ns_id); 673 cmnd->rw.nsid = cpu_to_le32(ns->ns_id);
706 length = nvme_setup_prps(nvmeq->dev, &cmnd->common, iod, length, 674 cmnd->rw.prp1 = cpu_to_le64(sg_dma_address(iod->sg));
707 GFP_ATOMIC); 675 cmnd->rw.prp2 = cpu_to_le64(iod->first_dma);
708 cmnd->rw.slba = cpu_to_le64(nvme_block_nr(ns, bio->bi_iter.bi_sector)); 676 cmnd->rw.slba = cpu_to_le64(nvme_block_nr(ns, bio->bi_iter.bi_sector));
709 cmnd->rw.length = cpu_to_le16((length >> ns->lba_shift) - 1); 677 cmnd->rw.length =
678 cpu_to_le16((bio->bi_iter.bi_size >> ns->lba_shift) - 1);
710 cmnd->rw.control = cpu_to_le16(control); 679 cmnd->rw.control = cpu_to_le16(control);
711 cmnd->rw.dsmgmt = cpu_to_le32(dsmgmt); 680 cmnd->rw.dsmgmt = cpu_to_le32(dsmgmt);
712 681
713 nvme_start_io_acct(bio);
714 if (++nvmeq->sq_tail == nvmeq->q_depth) 682 if (++nvmeq->sq_tail == nvmeq->q_depth)
715 nvmeq->sq_tail = 0; 683 nvmeq->sq_tail = 0;
716 writel(nvmeq->sq_tail, nvmeq->q_db); 684 writel(nvmeq->sq_tail, nvmeq->q_db);
717 685
718 return 0; 686 return 0;
687}
688
689/*
690 * Called with local interrupts disabled and the q_lock held. May not sleep.
691 */
692static int nvme_submit_bio_queue(struct nvme_queue *nvmeq, struct nvme_ns *ns,
693 struct bio *bio)
694{
695 struct nvme_iod *iod;
696 int psegs = bio_phys_segments(ns->queue, bio);
697 int result;
698
699 if ((bio->bi_rw & REQ_FLUSH) && psegs) {
700 result = nvme_submit_flush_data(nvmeq, ns);
701 if (result)
702 return result;
703 }
704
705 iod = nvme_alloc_iod(psegs, bio->bi_iter.bi_size, GFP_ATOMIC);
706 if (!iod)
707 return -ENOMEM;
708
709 iod->private = bio;
710 if (bio->bi_rw & REQ_DISCARD) {
711 void *range;
712 /*
713 * We reuse the small pool to allocate the 16-byte range here
714 * as it is not worth having a special pool for these or
715 * additional cases to handle freeing the iod.
716 */
717 range = dma_pool_alloc(nvmeq->dev->prp_small_pool,
718 GFP_ATOMIC,
719 &iod->first_dma);
720 if (!range) {
721 result = -ENOMEM;
722 goto free_iod;
723 }
724 iod_list(iod)[0] = (__le64 *)range;
725 iod->npages = 0;
726 } else if (psegs) {
727 result = nvme_map_bio(nvmeq, iod, bio,
728 bio_data_dir(bio) ? DMA_TO_DEVICE : DMA_FROM_DEVICE,
729 psegs);
730 if (result <= 0)
731 goto free_iod;
732 if (nvme_setup_prps(nvmeq->dev, iod, result, GFP_ATOMIC) !=
733 result) {
734 result = -ENOMEM;
735 goto free_iod;
736 }
737 nvme_start_io_acct(bio);
738 }
739 if (unlikely(nvme_submit_iod(nvmeq, iod))) {
740 if (!waitqueue_active(&nvmeq->sq_full))
741 add_wait_queue(&nvmeq->sq_full, &nvmeq->sq_cong_wait);
742 list_add_tail(&iod->node, &nvmeq->iod_bio);
743 }
744 return 0;
719 745
720 free_cmdid:
721 free_cmdid(nvmeq, cmdid, NULL);
722 free_iod: 746 free_iod:
723 nvme_free_iod(nvmeq->dev, iod); 747 nvme_free_iod(nvmeq->dev, iod);
724 nomem:
725 return result; 748 return result;
726} 749}
727 750
@@ -745,7 +768,7 @@ static int nvme_process_cq(struct nvme_queue *nvmeq)
745 } 768 }
746 769
747 ctx = free_cmdid(nvmeq, cqe.command_id, &fn); 770 ctx = free_cmdid(nvmeq, cqe.command_id, &fn);
748 fn(nvmeq->dev, ctx, &cqe); 771 fn(nvmeq, ctx, &cqe);
749 } 772 }
750 773
751 /* If the controller ignores the cq head doorbell and continuously 774 /* If the controller ignores the cq head doorbell and continuously
@@ -781,7 +804,7 @@ static void nvme_make_request(struct request_queue *q, struct bio *bio)
781 if (!nvmeq->q_suspended && bio_list_empty(&nvmeq->sq_cong)) 804 if (!nvmeq->q_suspended && bio_list_empty(&nvmeq->sq_cong))
782 result = nvme_submit_bio_queue(nvmeq, ns, bio); 805 result = nvme_submit_bio_queue(nvmeq, ns, bio);
783 if (unlikely(result)) { 806 if (unlikely(result)) {
784 if (bio_list_empty(&nvmeq->sq_cong)) 807 if (!waitqueue_active(&nvmeq->sq_full))
785 add_wait_queue(&nvmeq->sq_full, &nvmeq->sq_cong_wait); 808 add_wait_queue(&nvmeq->sq_full, &nvmeq->sq_cong_wait);
786 bio_list_add(&nvmeq->sq_cong, bio); 809 bio_list_add(&nvmeq->sq_cong, bio);
787 } 810 }
@@ -825,7 +848,7 @@ struct sync_cmd_info {
825 int status; 848 int status;
826}; 849};
827 850
828static void sync_completion(struct nvme_dev *dev, void *ctx, 851static void sync_completion(struct nvme_queue *nvmeq, void *ctx,
829 struct nvme_completion *cqe) 852 struct nvme_completion *cqe)
830{ 853{
831 struct sync_cmd_info *cmdinfo = ctx; 854 struct sync_cmd_info *cmdinfo = ctx;
@@ -1112,7 +1135,7 @@ static void nvme_cancel_ios(struct nvme_queue *nvmeq, bool timeout)
1112 dev_warn(nvmeq->q_dmadev, "Cancelling I/O %d QID %d\n", cmdid, 1135 dev_warn(nvmeq->q_dmadev, "Cancelling I/O %d QID %d\n", cmdid,
1113 nvmeq->qid); 1136 nvmeq->qid);
1114 ctx = cancel_cmdid(nvmeq, cmdid, &fn); 1137 ctx = cancel_cmdid(nvmeq, cmdid, &fn);
1115 fn(nvmeq->dev, ctx, &cqe); 1138 fn(nvmeq, ctx, &cqe);
1116 } 1139 }
1117} 1140}
1118 1141
@@ -1125,6 +1148,17 @@ static void nvme_free_queue(struct rcu_head *r)
1125 struct bio *bio = bio_list_pop(&nvmeq->sq_cong); 1148 struct bio *bio = bio_list_pop(&nvmeq->sq_cong);
1126 bio_endio(bio, -EIO); 1149 bio_endio(bio, -EIO);
1127 } 1150 }
1151 while (!list_empty(&nvmeq->iod_bio)) {
1152 static struct nvme_completion cqe = {
1153 .status = cpu_to_le16(
1154 (NVME_SC_ABORT_REQ | NVME_SC_DNR) << 1),
1155 };
1156 struct nvme_iod *iod = list_first_entry(&nvmeq->iod_bio,
1157 struct nvme_iod,
1158 node);
1159 list_del(&iod->node);
1160 bio_completion(nvmeq, iod, &cqe);
1161 }
1128 spin_unlock_irq(&nvmeq->q_lock); 1162 spin_unlock_irq(&nvmeq->q_lock);
1129 1163
1130 dma_free_coherent(nvmeq->q_dmadev, CQ_SIZE(nvmeq->q_depth), 1164 dma_free_coherent(nvmeq->q_dmadev, CQ_SIZE(nvmeq->q_depth),
@@ -1232,6 +1266,7 @@ static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
1232 init_waitqueue_head(&nvmeq->sq_full); 1266 init_waitqueue_head(&nvmeq->sq_full);
1233 init_waitqueue_entry(&nvmeq->sq_cong_wait, nvme_thread); 1267 init_waitqueue_entry(&nvmeq->sq_cong_wait, nvme_thread);
1234 bio_list_init(&nvmeq->sq_cong); 1268 bio_list_init(&nvmeq->sq_cong);
1269 INIT_LIST_HEAD(&nvmeq->iod_bio);
1235 nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride]; 1270 nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride];
1236 nvmeq->q_depth = depth; 1271 nvmeq->q_depth = depth;
1237 nvmeq->cq_vector = vector; 1272 nvmeq->cq_vector = vector;
@@ -1565,7 +1600,9 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
1565 c.rw.metadata = cpu_to_le64(meta_dma_addr); 1600 c.rw.metadata = cpu_to_le64(meta_dma_addr);
1566 } 1601 }
1567 1602
1568 length = nvme_setup_prps(dev, &c.common, iod, length, GFP_KERNEL); 1603 length = nvme_setup_prps(dev, iod, length, GFP_KERNEL);
1604 c.rw.prp1 = cpu_to_le64(sg_dma_address(iod->sg));
1605 c.rw.prp2 = cpu_to_le64(iod->first_dma);
1569 1606
1570 if (length != (io.nblocks + 1) << ns->lba_shift) 1607 if (length != (io.nblocks + 1) << ns->lba_shift)
1571 status = -ENOMEM; 1608 status = -ENOMEM;
@@ -1635,8 +1672,9 @@ static int nvme_user_admin_cmd(struct nvme_dev *dev,
1635 length); 1672 length);
1636 if (IS_ERR(iod)) 1673 if (IS_ERR(iod))
1637 return PTR_ERR(iod); 1674 return PTR_ERR(iod);
1638 length = nvme_setup_prps(dev, &c.common, iod, length, 1675 length = nvme_setup_prps(dev, iod, length, GFP_KERNEL);
1639 GFP_KERNEL); 1676 c.common.prp1 = cpu_to_le64(sg_dma_address(iod->sg));
1677 c.common.prp2 = cpu_to_le64(iod->first_dma);
1640 } 1678 }
1641 1679
1642 timeout = cmd.timeout_ms ? msecs_to_jiffies(cmd.timeout_ms) : 1680 timeout = cmd.timeout_ms ? msecs_to_jiffies(cmd.timeout_ms) :
@@ -1733,17 +1771,33 @@ static const struct block_device_operations nvme_fops = {
1733 .getgeo = nvme_getgeo, 1771 .getgeo = nvme_getgeo,
1734}; 1772};
1735 1773
1774static void nvme_resubmit_iods(struct nvme_queue *nvmeq)
1775{
1776 struct nvme_iod *iod, *next;
1777
1778 list_for_each_entry_safe(iod, next, &nvmeq->iod_bio, node) {
1779 if (unlikely(nvme_submit_iod(nvmeq, iod)))
1780 break;
1781 list_del(&iod->node);
1782 if (bio_list_empty(&nvmeq->sq_cong) &&
1783 list_empty(&nvmeq->iod_bio))
1784 remove_wait_queue(&nvmeq->sq_full,
1785 &nvmeq->sq_cong_wait);
1786 }
1787}
1788
1736static void nvme_resubmit_bios(struct nvme_queue *nvmeq) 1789static void nvme_resubmit_bios(struct nvme_queue *nvmeq)
1737{ 1790{
1738 while (bio_list_peek(&nvmeq->sq_cong)) { 1791 while (bio_list_peek(&nvmeq->sq_cong)) {
1739 struct bio *bio = bio_list_pop(&nvmeq->sq_cong); 1792 struct bio *bio = bio_list_pop(&nvmeq->sq_cong);
1740 struct nvme_ns *ns = bio->bi_bdev->bd_disk->private_data; 1793 struct nvme_ns *ns = bio->bi_bdev->bd_disk->private_data;
1741 1794
1742 if (bio_list_empty(&nvmeq->sq_cong)) 1795 if (bio_list_empty(&nvmeq->sq_cong) &&
1796 list_empty(&nvmeq->iod_bio))
1743 remove_wait_queue(&nvmeq->sq_full, 1797 remove_wait_queue(&nvmeq->sq_full,
1744 &nvmeq->sq_cong_wait); 1798 &nvmeq->sq_cong_wait);
1745 if (nvme_submit_bio_queue(nvmeq, ns, bio)) { 1799 if (nvme_submit_bio_queue(nvmeq, ns, bio)) {
1746 if (bio_list_empty(&nvmeq->sq_cong)) 1800 if (!waitqueue_active(&nvmeq->sq_full))
1747 add_wait_queue(&nvmeq->sq_full, 1801 add_wait_queue(&nvmeq->sq_full,
1748 &nvmeq->sq_cong_wait); 1802 &nvmeq->sq_cong_wait);
1749 bio_list_add_head(&nvmeq->sq_cong, bio); 1803 bio_list_add_head(&nvmeq->sq_cong, bio);
@@ -1785,6 +1839,7 @@ static int nvme_kthread(void *data)
1785 nvme_process_cq(nvmeq); 1839 nvme_process_cq(nvmeq);
1786 nvme_cancel_ios(nvmeq, true); 1840 nvme_cancel_ios(nvmeq, true);
1787 nvme_resubmit_bios(nvmeq); 1841 nvme_resubmit_bios(nvmeq);
1842 nvme_resubmit_iods(nvmeq);
1788 unlock: 1843 unlock:
1789 spin_unlock_irq(&nvmeq->q_lock); 1844 spin_unlock_irq(&nvmeq->q_lock);
1790 } 1845 }
diff --git a/drivers/block/nvme-scsi.c b/drivers/block/nvme-scsi.c
index 111c920c1574..2c3f5be06da1 100644
--- a/drivers/block/nvme-scsi.c
+++ b/drivers/block/nvme-scsi.c
@@ -1562,13 +1562,14 @@ static int nvme_trans_send_fw_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr,
1562 res = PTR_ERR(iod); 1562 res = PTR_ERR(iod);
1563 goto out; 1563 goto out;
1564 } 1564 }
1565 length = nvme_setup_prps(dev, &c.common, iod, tot_len, 1565 length = nvme_setup_prps(dev, iod, tot_len, GFP_KERNEL);
1566 GFP_KERNEL);
1567 if (length != tot_len) { 1566 if (length != tot_len) {
1568 res = -ENOMEM; 1567 res = -ENOMEM;
1569 goto out_unmap; 1568 goto out_unmap;
1570 } 1569 }
1571 1570
1571 c.dlfw.prp1 = cpu_to_le64(sg_dma_address(iod->sg));
1572 c.dlfw.prp2 = cpu_to_le64(iod->first_dma);
1572 c.dlfw.numd = cpu_to_le32((tot_len/BYTES_TO_DWORDS) - 1); 1573 c.dlfw.numd = cpu_to_le32((tot_len/BYTES_TO_DWORDS) - 1);
1573 c.dlfw.offset = cpu_to_le32(offset/BYTES_TO_DWORDS); 1574 c.dlfw.offset = cpu_to_le32(offset/BYTES_TO_DWORDS);
1574 } else if (opcode == nvme_admin_activate_fw) { 1575 } else if (opcode == nvme_admin_activate_fw) {
@@ -2092,8 +2093,7 @@ static int nvme_trans_do_nvme_io(struct nvme_ns *ns, struct sg_io_hdr *hdr,
2092 res = PTR_ERR(iod); 2093 res = PTR_ERR(iod);
2093 goto out; 2094 goto out;
2094 } 2095 }
2095 retcode = nvme_setup_prps(dev, &c.common, iod, unit_len, 2096 retcode = nvme_setup_prps(dev, iod, unit_len, GFP_KERNEL);
2096 GFP_KERNEL);
2097 if (retcode != unit_len) { 2097 if (retcode != unit_len) {
2098 nvme_unmap_user_pages(dev, 2098 nvme_unmap_user_pages(dev,
2099 (is_write) ? DMA_TO_DEVICE : DMA_FROM_DEVICE, 2099 (is_write) ? DMA_TO_DEVICE : DMA_FROM_DEVICE,
@@ -2102,6 +2102,8 @@ static int nvme_trans_do_nvme_io(struct nvme_ns *ns, struct sg_io_hdr *hdr,
2102 res = -ENOMEM; 2102 res = -ENOMEM;
2103 goto out; 2103 goto out;
2104 } 2104 }
2105 c.rw.prp1 = cpu_to_le64(sg_dma_address(iod->sg));
2106 c.rw.prp2 = cpu_to_le64(iod->first_dma);
2105 2107
2106 nvme_offset += unit_num_blocks; 2108 nvme_offset += unit_num_blocks;
2107 2109