aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-07-27 15:51:00 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2018-07-27 15:51:00 -0400
commiteb181a814c98255b32d30b383baca00e6ebec72e (patch)
treef218ee22baa54bd70bd5a0f2d51d70736eb7bbce
parent864af0d40cdc82b705e3ca79cf2a57be900954b1 (diff)
parent5151842b9d8732d4cbfa8400b40bff894f501b2f (diff)
Merge tag 'for-linus-20180727' of git://git.kernel.dk/linux-block
Pull block fixes from Jens Axboe: "Bigger than usual at this time, mostly due to the O_DIRECT corruption issue and the fact that I was on vacation last week. This contains: - NVMe pull request with two fixes for the FC code, and two target fixes (Christoph) - a DIF bio reset iteration fix (Greg Edwards) - two nbd reply and requeue fixes (Josef) - SCSI timeout fixup (Keith) - a small series that fixes an issue with bio_iov_iter_get_pages(), which ended up causing corruption for larger sized O_DIRECT writes that ended up racing with buffered writes (Martin Wilck)" * tag 'for-linus-20180727' of git://git.kernel.dk/linux-block: block: reset bi_iter.bi_done after splitting bio block: bio_iov_iter_get_pages: pin more pages for multi-segment IOs blkdev: __blkdev_direct_IO_simple: fix leak in error case block: bio_iov_iter_get_pages: fix size of last iovec nvmet: only check for filebacking on -ENOTBLK nvmet: fixup crash on NULL device path scsi: set timed out out mq requests to complete blk-mq: export setting request completion state nvme: if_ready checks to fail io to deleting controller nvmet-fc: fix target sgl list on large transfers nbd: handle unexpected replies better nbd: don't requeue the same request twice.
-rw-r--r--block/bio.c54
-rw-r--r--block/blk-mq.c4
-rw-r--r--drivers/block/nbd.c96
-rw-r--r--drivers/nvme/host/fabrics.c10
-rw-r--r--drivers/nvme/host/fabrics.h3
-rw-r--r--drivers/nvme/host/fc.c2
-rw-r--r--drivers/nvme/host/rdma.c2
-rw-r--r--drivers/nvme/target/configfs.c9
-rw-r--r--drivers/nvme/target/core.c2
-rw-r--r--drivers/nvme/target/fc.c44
-rw-r--r--drivers/nvme/target/loop.c2
-rw-r--r--drivers/scsi/scsi_error.c14
-rw-r--r--fs/block_dev.c9
-rw-r--r--include/linux/blk-mq.h14
14 files changed, 209 insertions, 56 deletions
diff --git a/block/bio.c b/block/bio.c
index 67eff5eddc49..047c5dca6d90 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -903,25 +903,27 @@ int bio_add_page(struct bio *bio, struct page *page,
903EXPORT_SYMBOL(bio_add_page); 903EXPORT_SYMBOL(bio_add_page);
904 904
905/** 905/**
906 * bio_iov_iter_get_pages - pin user or kernel pages and add them to a bio 906 * __bio_iov_iter_get_pages - pin user or kernel pages and add them to a bio
907 * @bio: bio to add pages to 907 * @bio: bio to add pages to
908 * @iter: iov iterator describing the region to be mapped 908 * @iter: iov iterator describing the region to be mapped
909 * 909 *
910 * Pins as many pages from *iter and appends them to @bio's bvec array. The 910 * Pins pages from *iter and appends them to @bio's bvec array. The
911 * pages will have to be released using put_page() when done. 911 * pages will have to be released using put_page() when done.
912 * For multi-segment *iter, this function only adds pages from the
913 * the next non-empty segment of the iov iterator.
912 */ 914 */
913int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter) 915static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
914{ 916{
915 unsigned short nr_pages = bio->bi_max_vecs - bio->bi_vcnt; 917 unsigned short nr_pages = bio->bi_max_vecs - bio->bi_vcnt, idx;
916 struct bio_vec *bv = bio->bi_io_vec + bio->bi_vcnt; 918 struct bio_vec *bv = bio->bi_io_vec + bio->bi_vcnt;
917 struct page **pages = (struct page **)bv; 919 struct page **pages = (struct page **)bv;
918 size_t offset, diff; 920 size_t offset;
919 ssize_t size; 921 ssize_t size;
920 922
921 size = iov_iter_get_pages(iter, pages, LONG_MAX, nr_pages, &offset); 923 size = iov_iter_get_pages(iter, pages, LONG_MAX, nr_pages, &offset);
922 if (unlikely(size <= 0)) 924 if (unlikely(size <= 0))
923 return size ? size : -EFAULT; 925 return size ? size : -EFAULT;
924 nr_pages = (size + offset + PAGE_SIZE - 1) / PAGE_SIZE; 926 idx = nr_pages = (size + offset + PAGE_SIZE - 1) / PAGE_SIZE;
925 927
926 /* 928 /*
927 * Deep magic below: We need to walk the pinned pages backwards 929 * Deep magic below: We need to walk the pinned pages backwards
@@ -934,21 +936,46 @@ int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
934 bio->bi_iter.bi_size += size; 936 bio->bi_iter.bi_size += size;
935 bio->bi_vcnt += nr_pages; 937 bio->bi_vcnt += nr_pages;
936 938
937 diff = (nr_pages * PAGE_SIZE - offset) - size; 939 while (idx--) {
938 while (nr_pages--) { 940 bv[idx].bv_page = pages[idx];
939 bv[nr_pages].bv_page = pages[nr_pages]; 941 bv[idx].bv_len = PAGE_SIZE;
940 bv[nr_pages].bv_len = PAGE_SIZE; 942 bv[idx].bv_offset = 0;
941 bv[nr_pages].bv_offset = 0;
942 } 943 }
943 944
944 bv[0].bv_offset += offset; 945 bv[0].bv_offset += offset;
945 bv[0].bv_len -= offset; 946 bv[0].bv_len -= offset;
946 if (diff) 947 bv[nr_pages - 1].bv_len -= nr_pages * PAGE_SIZE - offset - size;
947 bv[bio->bi_vcnt - 1].bv_len -= diff;
948 948
949 iov_iter_advance(iter, size); 949 iov_iter_advance(iter, size);
950 return 0; 950 return 0;
951} 951}
952
953/**
954 * bio_iov_iter_get_pages - pin user or kernel pages and add them to a bio
955 * @bio: bio to add pages to
956 * @iter: iov iterator describing the region to be mapped
957 *
958 * Pins pages from *iter and appends them to @bio's bvec array. The
959 * pages will have to be released using put_page() when done.
960 * The function tries, but does not guarantee, to pin as many pages as
961 * fit into the bio, or are requested in *iter, whatever is smaller.
962 * If MM encounters an error pinning the requested pages, it stops.
963 * Error is returned only if 0 pages could be pinned.
964 */
965int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
966{
967 unsigned short orig_vcnt = bio->bi_vcnt;
968
969 do {
970 int ret = __bio_iov_iter_get_pages(bio, iter);
971
972 if (unlikely(ret))
973 return bio->bi_vcnt > orig_vcnt ? 0 : ret;
974
975 } while (iov_iter_count(iter) && !bio_full(bio));
976
977 return 0;
978}
952EXPORT_SYMBOL_GPL(bio_iov_iter_get_pages); 979EXPORT_SYMBOL_GPL(bio_iov_iter_get_pages);
953 980
954static void submit_bio_wait_endio(struct bio *bio) 981static void submit_bio_wait_endio(struct bio *bio)
@@ -1866,6 +1893,7 @@ struct bio *bio_split(struct bio *bio, int sectors,
1866 bio_integrity_trim(split); 1893 bio_integrity_trim(split);
1867 1894
1868 bio_advance(bio, split->bi_iter.bi_size); 1895 bio_advance(bio, split->bi_iter.bi_size);
1896 bio->bi_iter.bi_done = 0;
1869 1897
1870 if (bio_flagged(bio, BIO_TRACE_COMPLETION)) 1898 if (bio_flagged(bio, BIO_TRACE_COMPLETION))
1871 bio_set_flag(split, BIO_TRACE_COMPLETION); 1899 bio_set_flag(split, BIO_TRACE_COMPLETION);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 95919268564b..654b0dc7e001 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -558,10 +558,8 @@ static void __blk_mq_complete_request(struct request *rq)
558 bool shared = false; 558 bool shared = false;
559 int cpu; 559 int cpu;
560 560
561 if (cmpxchg(&rq->state, MQ_RQ_IN_FLIGHT, MQ_RQ_COMPLETE) != 561 if (!blk_mq_mark_complete(rq))
562 MQ_RQ_IN_FLIGHT)
563 return; 562 return;
564
565 if (rq->internal_tag != -1) 563 if (rq->internal_tag != -1)
566 blk_mq_sched_completed_request(rq); 564 blk_mq_sched_completed_request(rq);
567 565
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 74a05561b620..3fb95c8d9fd8 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -112,12 +112,16 @@ struct nbd_device {
112 struct task_struct *task_setup; 112 struct task_struct *task_setup;
113}; 113};
114 114
115#define NBD_CMD_REQUEUED 1
116
115struct nbd_cmd { 117struct nbd_cmd {
116 struct nbd_device *nbd; 118 struct nbd_device *nbd;
119 struct mutex lock;
117 int index; 120 int index;
118 int cookie; 121 int cookie;
119 struct completion send_complete;
120 blk_status_t status; 122 blk_status_t status;
123 unsigned long flags;
124 u32 cmd_cookie;
121}; 125};
122 126
123#if IS_ENABLED(CONFIG_DEBUG_FS) 127#if IS_ENABLED(CONFIG_DEBUG_FS)
@@ -146,6 +150,35 @@ static inline struct device *nbd_to_dev(struct nbd_device *nbd)
146 return disk_to_dev(nbd->disk); 150 return disk_to_dev(nbd->disk);
147} 151}
148 152
153static void nbd_requeue_cmd(struct nbd_cmd *cmd)
154{
155 struct request *req = blk_mq_rq_from_pdu(cmd);
156
157 if (!test_and_set_bit(NBD_CMD_REQUEUED, &cmd->flags))
158 blk_mq_requeue_request(req, true);
159}
160
161#define NBD_COOKIE_BITS 32
162
163static u64 nbd_cmd_handle(struct nbd_cmd *cmd)
164{
165 struct request *req = blk_mq_rq_from_pdu(cmd);
166 u32 tag = blk_mq_unique_tag(req);
167 u64 cookie = cmd->cmd_cookie;
168
169 return (cookie << NBD_COOKIE_BITS) | tag;
170}
171
172static u32 nbd_handle_to_tag(u64 handle)
173{
174 return (u32)handle;
175}
176
177static u32 nbd_handle_to_cookie(u64 handle)
178{
179 return (u32)(handle >> NBD_COOKIE_BITS);
180}
181
149static const char *nbdcmd_to_ascii(int cmd) 182static const char *nbdcmd_to_ascii(int cmd)
150{ 183{
151 switch (cmd) { 184 switch (cmd) {
@@ -319,6 +352,9 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
319 } 352 }
320 config = nbd->config; 353 config = nbd->config;
321 354
355 if (!mutex_trylock(&cmd->lock))
356 return BLK_EH_RESET_TIMER;
357
322 if (config->num_connections > 1) { 358 if (config->num_connections > 1) {
323 dev_err_ratelimited(nbd_to_dev(nbd), 359 dev_err_ratelimited(nbd_to_dev(nbd),
324 "Connection timed out, retrying (%d/%d alive)\n", 360 "Connection timed out, retrying (%d/%d alive)\n",
@@ -343,7 +379,8 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
343 nbd_mark_nsock_dead(nbd, nsock, 1); 379 nbd_mark_nsock_dead(nbd, nsock, 1);
344 mutex_unlock(&nsock->tx_lock); 380 mutex_unlock(&nsock->tx_lock);
345 } 381 }
346 blk_mq_requeue_request(req, true); 382 mutex_unlock(&cmd->lock);
383 nbd_requeue_cmd(cmd);
347 nbd_config_put(nbd); 384 nbd_config_put(nbd);
348 return BLK_EH_DONE; 385 return BLK_EH_DONE;
349 } 386 }
@@ -353,6 +390,7 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
353 } 390 }
354 set_bit(NBD_TIMEDOUT, &config->runtime_flags); 391 set_bit(NBD_TIMEDOUT, &config->runtime_flags);
355 cmd->status = BLK_STS_IOERR; 392 cmd->status = BLK_STS_IOERR;
393 mutex_unlock(&cmd->lock);
356 sock_shutdown(nbd); 394 sock_shutdown(nbd);
357 nbd_config_put(nbd); 395 nbd_config_put(nbd);
358done: 396done:
@@ -430,9 +468,9 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
430 struct iov_iter from; 468 struct iov_iter from;
431 unsigned long size = blk_rq_bytes(req); 469 unsigned long size = blk_rq_bytes(req);
432 struct bio *bio; 470 struct bio *bio;
471 u64 handle;
433 u32 type; 472 u32 type;
434 u32 nbd_cmd_flags = 0; 473 u32 nbd_cmd_flags = 0;
435 u32 tag = blk_mq_unique_tag(req);
436 int sent = nsock->sent, skip = 0; 474 int sent = nsock->sent, skip = 0;
437 475
438 iov_iter_kvec(&from, WRITE | ITER_KVEC, &iov, 1, sizeof(request)); 476 iov_iter_kvec(&from, WRITE | ITER_KVEC, &iov, 1, sizeof(request));
@@ -474,6 +512,8 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
474 goto send_pages; 512 goto send_pages;
475 } 513 }
476 iov_iter_advance(&from, sent); 514 iov_iter_advance(&from, sent);
515 } else {
516 cmd->cmd_cookie++;
477 } 517 }
478 cmd->index = index; 518 cmd->index = index;
479 cmd->cookie = nsock->cookie; 519 cmd->cookie = nsock->cookie;
@@ -482,7 +522,8 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
482 request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9); 522 request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9);
483 request.len = htonl(size); 523 request.len = htonl(size);
484 } 524 }
485 memcpy(request.handle, &tag, sizeof(tag)); 525 handle = nbd_cmd_handle(cmd);
526 memcpy(request.handle, &handle, sizeof(handle));
486 527
487 dev_dbg(nbd_to_dev(nbd), "request %p: sending control (%s@%llu,%uB)\n", 528 dev_dbg(nbd_to_dev(nbd), "request %p: sending control (%s@%llu,%uB)\n",
488 req, nbdcmd_to_ascii(type), 529 req, nbdcmd_to_ascii(type),
@@ -500,6 +541,7 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
500 nsock->pending = req; 541 nsock->pending = req;
501 nsock->sent = sent; 542 nsock->sent = sent;
502 } 543 }
544 set_bit(NBD_CMD_REQUEUED, &cmd->flags);
503 return BLK_STS_RESOURCE; 545 return BLK_STS_RESOURCE;
504 } 546 }
505 dev_err_ratelimited(disk_to_dev(nbd->disk), 547 dev_err_ratelimited(disk_to_dev(nbd->disk),
@@ -541,6 +583,7 @@ send_pages:
541 */ 583 */
542 nsock->pending = req; 584 nsock->pending = req;
543 nsock->sent = sent; 585 nsock->sent = sent;
586 set_bit(NBD_CMD_REQUEUED, &cmd->flags);
544 return BLK_STS_RESOURCE; 587 return BLK_STS_RESOURCE;
545 } 588 }
546 dev_err(disk_to_dev(nbd->disk), 589 dev_err(disk_to_dev(nbd->disk),
@@ -573,10 +616,12 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
573 struct nbd_reply reply; 616 struct nbd_reply reply;
574 struct nbd_cmd *cmd; 617 struct nbd_cmd *cmd;
575 struct request *req = NULL; 618 struct request *req = NULL;
619 u64 handle;
576 u16 hwq; 620 u16 hwq;
577 u32 tag; 621 u32 tag;
578 struct kvec iov = {.iov_base = &reply, .iov_len = sizeof(reply)}; 622 struct kvec iov = {.iov_base = &reply, .iov_len = sizeof(reply)};
579 struct iov_iter to; 623 struct iov_iter to;
624 int ret = 0;
580 625
581 reply.magic = 0; 626 reply.magic = 0;
582 iov_iter_kvec(&to, READ | ITER_KVEC, &iov, 1, sizeof(reply)); 627 iov_iter_kvec(&to, READ | ITER_KVEC, &iov, 1, sizeof(reply));
@@ -594,8 +639,8 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
594 return ERR_PTR(-EPROTO); 639 return ERR_PTR(-EPROTO);
595 } 640 }
596 641
597 memcpy(&tag, reply.handle, sizeof(u32)); 642 memcpy(&handle, reply.handle, sizeof(handle));
598 643 tag = nbd_handle_to_tag(handle);
599 hwq = blk_mq_unique_tag_to_hwq(tag); 644 hwq = blk_mq_unique_tag_to_hwq(tag);
600 if (hwq < nbd->tag_set.nr_hw_queues) 645 if (hwq < nbd->tag_set.nr_hw_queues)
601 req = blk_mq_tag_to_rq(nbd->tag_set.tags[hwq], 646 req = blk_mq_tag_to_rq(nbd->tag_set.tags[hwq],
@@ -606,11 +651,25 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
606 return ERR_PTR(-ENOENT); 651 return ERR_PTR(-ENOENT);
607 } 652 }
608 cmd = blk_mq_rq_to_pdu(req); 653 cmd = blk_mq_rq_to_pdu(req);
654
655 mutex_lock(&cmd->lock);
656 if (cmd->cmd_cookie != nbd_handle_to_cookie(handle)) {
657 dev_err(disk_to_dev(nbd->disk), "Double reply on req %p, cmd_cookie %u, handle cookie %u\n",
658 req, cmd->cmd_cookie, nbd_handle_to_cookie(handle));
659 ret = -ENOENT;
660 goto out;
661 }
662 if (test_bit(NBD_CMD_REQUEUED, &cmd->flags)) {
663 dev_err(disk_to_dev(nbd->disk), "Raced with timeout on req %p\n",
664 req);
665 ret = -ENOENT;
666 goto out;
667 }
609 if (ntohl(reply.error)) { 668 if (ntohl(reply.error)) {
610 dev_err(disk_to_dev(nbd->disk), "Other side returned error (%d)\n", 669 dev_err(disk_to_dev(nbd->disk), "Other side returned error (%d)\n",
611 ntohl(reply.error)); 670 ntohl(reply.error));
612 cmd->status = BLK_STS_IOERR; 671 cmd->status = BLK_STS_IOERR;
613 return cmd; 672 goto out;
614 } 673 }
615 674
616 dev_dbg(nbd_to_dev(nbd), "request %p: got reply\n", req); 675 dev_dbg(nbd_to_dev(nbd), "request %p: got reply\n", req);
@@ -635,18 +694,18 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
635 if (nbd_disconnected(config) || 694 if (nbd_disconnected(config) ||
636 config->num_connections <= 1) { 695 config->num_connections <= 1) {
637 cmd->status = BLK_STS_IOERR; 696 cmd->status = BLK_STS_IOERR;
638 return cmd; 697 goto out;
639 } 698 }
640 return ERR_PTR(-EIO); 699 ret = -EIO;
700 goto out;
641 } 701 }
642 dev_dbg(nbd_to_dev(nbd), "request %p: got %d bytes data\n", 702 dev_dbg(nbd_to_dev(nbd), "request %p: got %d bytes data\n",
643 req, bvec.bv_len); 703 req, bvec.bv_len);
644 } 704 }
645 } else {
646 /* See the comment in nbd_queue_rq. */
647 wait_for_completion(&cmd->send_complete);
648 } 705 }
649 return cmd; 706out:
707 mutex_unlock(&cmd->lock);
708 return ret ? ERR_PTR(ret) : cmd;
650} 709}
651 710
652static void recv_work(struct work_struct *work) 711static void recv_work(struct work_struct *work)
@@ -805,7 +864,7 @@ again:
805 */ 864 */
806 blk_mq_start_request(req); 865 blk_mq_start_request(req);
807 if (unlikely(nsock->pending && nsock->pending != req)) { 866 if (unlikely(nsock->pending && nsock->pending != req)) {
808 blk_mq_requeue_request(req, true); 867 nbd_requeue_cmd(cmd);
809 ret = 0; 868 ret = 0;
810 goto out; 869 goto out;
811 } 870 }
@@ -818,7 +877,7 @@ again:
818 dev_err_ratelimited(disk_to_dev(nbd->disk), 877 dev_err_ratelimited(disk_to_dev(nbd->disk),
819 "Request send failed, requeueing\n"); 878 "Request send failed, requeueing\n");
820 nbd_mark_nsock_dead(nbd, nsock, 1); 879 nbd_mark_nsock_dead(nbd, nsock, 1);
821 blk_mq_requeue_request(req, true); 880 nbd_requeue_cmd(cmd);
822 ret = 0; 881 ret = 0;
823 } 882 }
824out: 883out:
@@ -842,7 +901,8 @@ static blk_status_t nbd_queue_rq(struct blk_mq_hw_ctx *hctx,
842 * that the server is misbehaving (or there was an error) before we're 901 * that the server is misbehaving (or there was an error) before we're
843 * done sending everything over the wire. 902 * done sending everything over the wire.
844 */ 903 */
845 init_completion(&cmd->send_complete); 904 mutex_lock(&cmd->lock);
905 clear_bit(NBD_CMD_REQUEUED, &cmd->flags);
846 906
847 /* We can be called directly from the user space process, which means we 907 /* We can be called directly from the user space process, which means we
848 * could possibly have signals pending so our sendmsg will fail. In 908 * could possibly have signals pending so our sendmsg will fail. In
@@ -854,7 +914,7 @@ static blk_status_t nbd_queue_rq(struct blk_mq_hw_ctx *hctx,
854 ret = BLK_STS_IOERR; 914 ret = BLK_STS_IOERR;
855 else if (!ret) 915 else if (!ret)
856 ret = BLK_STS_OK; 916 ret = BLK_STS_OK;
857 complete(&cmd->send_complete); 917 mutex_unlock(&cmd->lock);
858 918
859 return ret; 919 return ret;
860} 920}
@@ -1460,6 +1520,8 @@ static int nbd_init_request(struct blk_mq_tag_set *set, struct request *rq,
1460{ 1520{
1461 struct nbd_cmd *cmd = blk_mq_rq_to_pdu(rq); 1521 struct nbd_cmd *cmd = blk_mq_rq_to_pdu(rq);
1462 cmd->nbd = set->driver_data; 1522 cmd->nbd = set->driver_data;
1523 cmd->flags = 0;
1524 mutex_init(&cmd->lock);
1463 return 0; 1525 return 0;
1464} 1526}
1465 1527
diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index 903eb4545e26..f7efe5a58cc7 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -539,14 +539,18 @@ static struct nvmf_transport_ops *nvmf_lookup_transport(
539/* 539/*
540 * For something we're not in a state to send to the device the default action 540 * For something we're not in a state to send to the device the default action
541 * is to busy it and retry it after the controller state is recovered. However, 541 * is to busy it and retry it after the controller state is recovered. However,
542 * anything marked for failfast or nvme multipath is immediately failed. 542 * if the controller is deleting or if anything is marked for failfast or
543 * nvme multipath it is immediately failed.
543 * 544 *
544 * Note: commands used to initialize the controller will be marked for failfast. 545 * Note: commands used to initialize the controller will be marked for failfast.
545 * Note: nvme cli/ioctl commands are marked for failfast. 546 * Note: nvme cli/ioctl commands are marked for failfast.
546 */ 547 */
547blk_status_t nvmf_fail_nonready_command(struct request *rq) 548blk_status_t nvmf_fail_nonready_command(struct nvme_ctrl *ctrl,
549 struct request *rq)
548{ 550{
549 if (!blk_noretry_request(rq) && !(rq->cmd_flags & REQ_NVME_MPATH)) 551 if (ctrl->state != NVME_CTRL_DELETING &&
552 ctrl->state != NVME_CTRL_DEAD &&
553 !blk_noretry_request(rq) && !(rq->cmd_flags & REQ_NVME_MPATH))
550 return BLK_STS_RESOURCE; 554 return BLK_STS_RESOURCE;
551 nvme_req(rq)->status = NVME_SC_ABORT_REQ; 555 nvme_req(rq)->status = NVME_SC_ABORT_REQ;
552 return BLK_STS_IOERR; 556 return BLK_STS_IOERR;
diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h
index e1818a27aa2d..aa2fdb2a2e8f 100644
--- a/drivers/nvme/host/fabrics.h
+++ b/drivers/nvme/host/fabrics.h
@@ -162,7 +162,8 @@ void nvmf_unregister_transport(struct nvmf_transport_ops *ops);
162void nvmf_free_options(struct nvmf_ctrl_options *opts); 162void nvmf_free_options(struct nvmf_ctrl_options *opts);
163int nvmf_get_address(struct nvme_ctrl *ctrl, char *buf, int size); 163int nvmf_get_address(struct nvme_ctrl *ctrl, char *buf, int size);
164bool nvmf_should_reconnect(struct nvme_ctrl *ctrl); 164bool nvmf_should_reconnect(struct nvme_ctrl *ctrl);
165blk_status_t nvmf_fail_nonready_command(struct request *rq); 165blk_status_t nvmf_fail_nonready_command(struct nvme_ctrl *ctrl,
166 struct request *rq);
166bool __nvmf_check_ready(struct nvme_ctrl *ctrl, struct request *rq, 167bool __nvmf_check_ready(struct nvme_ctrl *ctrl, struct request *rq,
167 bool queue_live); 168 bool queue_live);
168 169
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 41d45a1b5c62..9bac912173ba 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -2272,7 +2272,7 @@ nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx,
2272 2272
2273 if (ctrl->rport->remoteport.port_state != FC_OBJSTATE_ONLINE || 2273 if (ctrl->rport->remoteport.port_state != FC_OBJSTATE_ONLINE ||
2274 !nvmf_check_ready(&queue->ctrl->ctrl, rq, queue_ready)) 2274 !nvmf_check_ready(&queue->ctrl->ctrl, rq, queue_ready))
2275 return nvmf_fail_nonready_command(rq); 2275 return nvmf_fail_nonready_command(&queue->ctrl->ctrl, rq);
2276 2276
2277 ret = nvme_setup_cmd(ns, rq, sqe); 2277 ret = nvme_setup_cmd(ns, rq, sqe);
2278 if (ret) 2278 if (ret)
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 518c5b09038c..66ec5985c9f3 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -1639,7 +1639,7 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
1639 WARN_ON_ONCE(rq->tag < 0); 1639 WARN_ON_ONCE(rq->tag < 0);
1640 1640
1641 if (!nvmf_check_ready(&queue->ctrl->ctrl, rq, queue_ready)) 1641 if (!nvmf_check_ready(&queue->ctrl->ctrl, rq, queue_ready))
1642 return nvmf_fail_nonready_command(rq); 1642 return nvmf_fail_nonready_command(&queue->ctrl->ctrl, rq);
1643 1643
1644 dev = queue->device->dev; 1644 dev = queue->device->dev;
1645 ib_dma_sync_single_for_cpu(dev, sqe->dma, 1645 ib_dma_sync_single_for_cpu(dev, sqe->dma,
diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c
index d3f3b3ec4d1a..ebea1373d1b7 100644
--- a/drivers/nvme/target/configfs.c
+++ b/drivers/nvme/target/configfs.c
@@ -282,6 +282,7 @@ static ssize_t nvmet_ns_device_path_store(struct config_item *item,
282{ 282{
283 struct nvmet_ns *ns = to_nvmet_ns(item); 283 struct nvmet_ns *ns = to_nvmet_ns(item);
284 struct nvmet_subsys *subsys = ns->subsys; 284 struct nvmet_subsys *subsys = ns->subsys;
285 size_t len;
285 int ret; 286 int ret;
286 287
287 mutex_lock(&subsys->lock); 288 mutex_lock(&subsys->lock);
@@ -289,10 +290,14 @@ static ssize_t nvmet_ns_device_path_store(struct config_item *item,
289 if (ns->enabled) 290 if (ns->enabled)
290 goto out_unlock; 291 goto out_unlock;
291 292
292 kfree(ns->device_path); 293 ret = -EINVAL;
294 len = strcspn(page, "\n");
295 if (!len)
296 goto out_unlock;
293 297
298 kfree(ns->device_path);
294 ret = -ENOMEM; 299 ret = -ENOMEM;
295 ns->device_path = kstrndup(page, strcspn(page, "\n"), GFP_KERNEL); 300 ns->device_path = kstrndup(page, len, GFP_KERNEL);
296 if (!ns->device_path) 301 if (!ns->device_path)
297 goto out_unlock; 302 goto out_unlock;
298 303
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index 74d4b785d2da..9838103f2d62 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -339,7 +339,7 @@ int nvmet_ns_enable(struct nvmet_ns *ns)
339 goto out_unlock; 339 goto out_unlock;
340 340
341 ret = nvmet_bdev_ns_enable(ns); 341 ret = nvmet_bdev_ns_enable(ns);
342 if (ret) 342 if (ret == -ENOTBLK)
343 ret = nvmet_file_ns_enable(ns); 343 ret = nvmet_file_ns_enable(ns);
344 if (ret) 344 if (ret)
345 goto out_unlock; 345 goto out_unlock;
diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c
index 408279cb6f2c..29b4b236afd8 100644
--- a/drivers/nvme/target/fc.c
+++ b/drivers/nvme/target/fc.c
@@ -58,8 +58,8 @@ struct nvmet_fc_ls_iod {
58 struct work_struct work; 58 struct work_struct work;
59} __aligned(sizeof(unsigned long long)); 59} __aligned(sizeof(unsigned long long));
60 60
61/* desired maximum for a single sequence - if sg list allows it */
61#define NVMET_FC_MAX_SEQ_LENGTH (256 * 1024) 62#define NVMET_FC_MAX_SEQ_LENGTH (256 * 1024)
62#define NVMET_FC_MAX_XFR_SGENTS (NVMET_FC_MAX_SEQ_LENGTH / PAGE_SIZE)
63 63
64enum nvmet_fcp_datadir { 64enum nvmet_fcp_datadir {
65 NVMET_FCP_NODATA, 65 NVMET_FCP_NODATA,
@@ -74,6 +74,7 @@ struct nvmet_fc_fcp_iod {
74 struct nvme_fc_cmd_iu cmdiubuf; 74 struct nvme_fc_cmd_iu cmdiubuf;
75 struct nvme_fc_ersp_iu rspiubuf; 75 struct nvme_fc_ersp_iu rspiubuf;
76 dma_addr_t rspdma; 76 dma_addr_t rspdma;
77 struct scatterlist *next_sg;
77 struct scatterlist *data_sg; 78 struct scatterlist *data_sg;
78 int data_sg_cnt; 79 int data_sg_cnt;
79 u32 offset; 80 u32 offset;
@@ -1025,8 +1026,7 @@ nvmet_fc_register_targetport(struct nvmet_fc_port_info *pinfo,
1025 INIT_LIST_HEAD(&newrec->assoc_list); 1026 INIT_LIST_HEAD(&newrec->assoc_list);
1026 kref_init(&newrec->ref); 1027 kref_init(&newrec->ref);
1027 ida_init(&newrec->assoc_cnt); 1028 ida_init(&newrec->assoc_cnt);
1028 newrec->max_sg_cnt = min_t(u32, NVMET_FC_MAX_XFR_SGENTS, 1029 newrec->max_sg_cnt = template->max_sgl_segments;
1029 template->max_sgl_segments);
1030 1030
1031 ret = nvmet_fc_alloc_ls_iodlist(newrec); 1031 ret = nvmet_fc_alloc_ls_iodlist(newrec);
1032 if (ret) { 1032 if (ret) {
@@ -1722,6 +1722,7 @@ nvmet_fc_alloc_tgt_pgs(struct nvmet_fc_fcp_iod *fod)
1722 ((fod->io_dir == NVMET_FCP_WRITE) ? 1722 ((fod->io_dir == NVMET_FCP_WRITE) ?
1723 DMA_FROM_DEVICE : DMA_TO_DEVICE)); 1723 DMA_FROM_DEVICE : DMA_TO_DEVICE));
1724 /* note: write from initiator perspective */ 1724 /* note: write from initiator perspective */
1725 fod->next_sg = fod->data_sg;
1725 1726
1726 return 0; 1727 return 0;
1727 1728
@@ -1866,24 +1867,49 @@ nvmet_fc_transfer_fcp_data(struct nvmet_fc_tgtport *tgtport,
1866 struct nvmet_fc_fcp_iod *fod, u8 op) 1867 struct nvmet_fc_fcp_iod *fod, u8 op)
1867{ 1868{
1868 struct nvmefc_tgt_fcp_req *fcpreq = fod->fcpreq; 1869 struct nvmefc_tgt_fcp_req *fcpreq = fod->fcpreq;
1870 struct scatterlist *sg = fod->next_sg;
1869 unsigned long flags; 1871 unsigned long flags;
1870 u32 tlen; 1872 u32 remaininglen = fod->req.transfer_len - fod->offset;
1873 u32 tlen = 0;
1871 int ret; 1874 int ret;
1872 1875
1873 fcpreq->op = op; 1876 fcpreq->op = op;
1874 fcpreq->offset = fod->offset; 1877 fcpreq->offset = fod->offset;
1875 fcpreq->timeout = NVME_FC_TGTOP_TIMEOUT_SEC; 1878 fcpreq->timeout = NVME_FC_TGTOP_TIMEOUT_SEC;
1876 1879
1877 tlen = min_t(u32, tgtport->max_sg_cnt * PAGE_SIZE, 1880 /*
1878 (fod->req.transfer_len - fod->offset)); 1881 * for next sequence:
1882 * break at a sg element boundary
1883 * attempt to keep sequence length capped at
1884 * NVMET_FC_MAX_SEQ_LENGTH but allow sequence to
1885 * be longer if a single sg element is larger
1886 * than that amount. This is done to avoid creating
1887 * a new sg list to use for the tgtport api.
1888 */
1889 fcpreq->sg = sg;
1890 fcpreq->sg_cnt = 0;
1891 while (tlen < remaininglen &&
1892 fcpreq->sg_cnt < tgtport->max_sg_cnt &&
1893 tlen + sg_dma_len(sg) < NVMET_FC_MAX_SEQ_LENGTH) {
1894 fcpreq->sg_cnt++;
1895 tlen += sg_dma_len(sg);
1896 sg = sg_next(sg);
1897 }
1898 if (tlen < remaininglen && fcpreq->sg_cnt == 0) {
1899 fcpreq->sg_cnt++;
1900 tlen += min_t(u32, sg_dma_len(sg), remaininglen);
1901 sg = sg_next(sg);
1902 }
1903 if (tlen < remaininglen)
1904 fod->next_sg = sg;
1905 else
1906 fod->next_sg = NULL;
1907
1879 fcpreq->transfer_length = tlen; 1908 fcpreq->transfer_length = tlen;
1880 fcpreq->transferred_length = 0; 1909 fcpreq->transferred_length = 0;
1881 fcpreq->fcp_error = 0; 1910 fcpreq->fcp_error = 0;
1882 fcpreq->rsplen = 0; 1911 fcpreq->rsplen = 0;
1883 1912
1884 fcpreq->sg = &fod->data_sg[fod->offset / PAGE_SIZE];
1885 fcpreq->sg_cnt = DIV_ROUND_UP(tlen, PAGE_SIZE);
1886
1887 /* 1913 /*
1888 * If the last READDATA request: check if LLDD supports 1914 * If the last READDATA request: check if LLDD supports
1889 * combined xfr with response. 1915 * combined xfr with response.
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index d8d91f04bd7e..ae7586b8be07 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -162,7 +162,7 @@ static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx,
162 blk_status_t ret; 162 blk_status_t ret;
163 163
164 if (!nvmf_check_ready(&queue->ctrl->ctrl, req, queue_ready)) 164 if (!nvmf_check_ready(&queue->ctrl->ctrl, req, queue_ready))
165 return nvmf_fail_nonready_command(req); 165 return nvmf_fail_nonready_command(&queue->ctrl->ctrl, req);
166 166
167 ret = nvme_setup_cmd(ns, req, &iod->cmd); 167 ret = nvme_setup_cmd(ns, req, &iod->cmd);
168 if (ret) 168 if (ret)
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index 8932ae81a15a..2715cdaa669c 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -296,6 +296,20 @@ enum blk_eh_timer_return scsi_times_out(struct request *req)
296 rtn = host->hostt->eh_timed_out(scmd); 296 rtn = host->hostt->eh_timed_out(scmd);
297 297
298 if (rtn == BLK_EH_DONE) { 298 if (rtn == BLK_EH_DONE) {
299 /*
300 * For blk-mq, we must set the request state to complete now
301 * before sending the request to the scsi error handler. This
302 * will prevent a use-after-free in the event the LLD manages
303 * to complete the request before the error handler finishes
304 * processing this timed out request.
305 *
306 * If the request was already completed, then the LLD beat the
307 * time out handler from transferring the request to the scsi
308 * error handler. In that case we can return immediately as no
309 * further action is required.
310 */
311 if (req->q->mq_ops && !blk_mq_mark_complete(req))
312 return rtn;
299 if (scsi_abort_command(scmd) != SUCCESS) { 313 if (scsi_abort_command(scmd) != SUCCESS) {
300 set_host_byte(scmd, DID_TIME_OUT); 314 set_host_byte(scmd, DID_TIME_OUT);
301 scsi_eh_scmd_add(scmd); 315 scsi_eh_scmd_add(scmd);
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 0dd87aaeb39a..aba25414231a 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -221,7 +221,7 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter,
221 221
222 ret = bio_iov_iter_get_pages(&bio, iter); 222 ret = bio_iov_iter_get_pages(&bio, iter);
223 if (unlikely(ret)) 223 if (unlikely(ret))
224 return ret; 224 goto out;
225 ret = bio.bi_iter.bi_size; 225 ret = bio.bi_iter.bi_size;
226 226
227 if (iov_iter_rw(iter) == READ) { 227 if (iov_iter_rw(iter) == READ) {
@@ -250,12 +250,13 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter,
250 put_page(bvec->bv_page); 250 put_page(bvec->bv_page);
251 } 251 }
252 252
253 if (vecs != inline_vecs)
254 kfree(vecs);
255
256 if (unlikely(bio.bi_status)) 253 if (unlikely(bio.bi_status))
257 ret = blk_status_to_errno(bio.bi_status); 254 ret = blk_status_to_errno(bio.bi_status);
258 255
256out:
257 if (vecs != inline_vecs)
258 kfree(vecs);
259
259 bio_uninit(&bio); 260 bio_uninit(&bio);
260 261
261 return ret; 262 return ret;
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index e3147eb74222..ca3f2c2edd85 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -287,6 +287,20 @@ void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues);
287 287
288void blk_mq_quiesce_queue_nowait(struct request_queue *q); 288void blk_mq_quiesce_queue_nowait(struct request_queue *q);
289 289
290/**
291 * blk_mq_mark_complete() - Set request state to complete
292 * @rq: request to set to complete state
293 *
294 * Returns true if request state was successfully set to complete. If
295 * successful, the caller is responsibile for seeing this request is ended, as
296 * blk_mq_complete_request will not work again.
297 */
298static inline bool blk_mq_mark_complete(struct request *rq)
299{
300 return cmpxchg(&rq->state, MQ_RQ_IN_FLIGHT, MQ_RQ_COMPLETE) ==
301 MQ_RQ_IN_FLIGHT;
302}
303
290/* 304/*
291 * Driver command data is immediately after the request. So subtract request 305 * Driver command data is immediately after the request. So subtract request
292 * size to get back to the original request, add request size to get the PDU. 306 * size to get back to the original request, add request size to get the PDU.