aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-03-29 17:43:07 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2019-03-29 17:43:07 -0400
commitffb8e45cf33e14d9a565491aec7abe039bebcfce (patch)
tree4181a995d8fb1de48ed6af9c4211ac6a27b54b0a
parent7376e39ad96583545faefa2e7798bcb6a2a212a7 (diff)
parent7bca889ee9297c3e208dee7c41aed7a56a880400 (diff)
Merge tag 'for-linus-20190329' of git://git.kernel.dk/linux-block
Pull block fixes from Jens Axboe: "Small set of fixes that should go into this series. This contains: - compat signal mask fix for io_uring (Arnd) - EAGAIN corner case for direct vs buffered writes for io_uring (Roman) - NVMe pull request from Christoph with various little fixes - sbitmap ws_active fix, which caused a perf regression for shared tags (me) - sbitmap bit ordering fix (Ming) - libata on-stack DMA fix (Raymond)" * tag 'for-linus-20190329' of git://git.kernel.dk/linux-block: nvmet: fix error flow during ns enable nvmet: fix building bvec from sg list nvme-multipath: relax ANA state check nvme-tcp: fix an endianess miss-annotation libata: fix using DMA buffers on stack io_uring: offload write to async worker in case of -EAGAIN sbitmap: order READ/WRITE freed instance and setting clear bit blk-mq: fix sbitmap ws_active for shared tags io_uring: fix big-endian compat signal mask handling blk-mq: update comment for blk_mq_hctx_has_pending() blk-mq: use blk_mq_put_driver_tag() to put tag
-rw-r--r--block/blk-flush.c4
-rw-r--r--block/blk-mq.c16
-rw-r--r--block/blk-mq.h9
-rw-r--r--drivers/ata/libata-zpodd.c34
-rw-r--r--drivers/nvme/host/multipath.c5
-rw-r--r--drivers/nvme/host/tcp.c2
-rw-r--r--drivers/nvme/target/core.c4
-rw-r--r--drivers/nvme/target/io-cmd-file.c20
-rw-r--r--fs/io_uring.c26
-rw-r--r--lib/sbitmap.c11
10 files changed, 88 insertions, 43 deletions
diff --git a/block/blk-flush.c b/block/blk-flush.c
index 6e0f2d97fc6d..d95f94892015 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -220,7 +220,7 @@ static void flush_end_io(struct request *flush_rq, blk_status_t error)
220 blk_mq_tag_set_rq(hctx, flush_rq->tag, fq->orig_rq); 220 blk_mq_tag_set_rq(hctx, flush_rq->tag, fq->orig_rq);
221 flush_rq->tag = -1; 221 flush_rq->tag = -1;
222 } else { 222 } else {
223 blk_mq_put_driver_tag_hctx(hctx, flush_rq); 223 blk_mq_put_driver_tag(flush_rq);
224 flush_rq->internal_tag = -1; 224 flush_rq->internal_tag = -1;
225 } 225 }
226 226
@@ -324,7 +324,7 @@ static void mq_flush_data_end_io(struct request *rq, blk_status_t error)
324 324
325 if (q->elevator) { 325 if (q->elevator) {
326 WARN_ON(rq->tag < 0); 326 WARN_ON(rq->tag < 0);
327 blk_mq_put_driver_tag_hctx(hctx, rq); 327 blk_mq_put_driver_tag(rq);
328 } 328 }
329 329
330 /* 330 /*
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 70b210a308c4..3ff3d7b49969 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -59,7 +59,8 @@ static int blk_mq_poll_stats_bkt(const struct request *rq)
59} 59}
60 60
61/* 61/*
62 * Check if any of the ctx's have pending work in this hardware queue 62 * Check if any of the ctx, dispatch list or elevator
63 * have pending work in this hardware queue.
63 */ 64 */
64static bool blk_mq_hctx_has_pending(struct blk_mq_hw_ctx *hctx) 65static bool blk_mq_hctx_has_pending(struct blk_mq_hw_ctx *hctx)
65{ 66{
@@ -1071,7 +1072,13 @@ static int blk_mq_dispatch_wake(wait_queue_entry_t *wait, unsigned mode,
1071 hctx = container_of(wait, struct blk_mq_hw_ctx, dispatch_wait); 1072 hctx = container_of(wait, struct blk_mq_hw_ctx, dispatch_wait);
1072 1073
1073 spin_lock(&hctx->dispatch_wait_lock); 1074 spin_lock(&hctx->dispatch_wait_lock);
1074 list_del_init(&wait->entry); 1075 if (!list_empty(&wait->entry)) {
1076 struct sbitmap_queue *sbq;
1077
1078 list_del_init(&wait->entry);
1079 sbq = &hctx->tags->bitmap_tags;
1080 atomic_dec(&sbq->ws_active);
1081 }
1075 spin_unlock(&hctx->dispatch_wait_lock); 1082 spin_unlock(&hctx->dispatch_wait_lock);
1076 1083
1077 blk_mq_run_hw_queue(hctx, true); 1084 blk_mq_run_hw_queue(hctx, true);
@@ -1087,6 +1094,7 @@ static int blk_mq_dispatch_wake(wait_queue_entry_t *wait, unsigned mode,
1087static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx *hctx, 1094static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx *hctx,
1088 struct request *rq) 1095 struct request *rq)
1089{ 1096{
1097 struct sbitmap_queue *sbq = &hctx->tags->bitmap_tags;
1090 struct wait_queue_head *wq; 1098 struct wait_queue_head *wq;
1091 wait_queue_entry_t *wait; 1099 wait_queue_entry_t *wait;
1092 bool ret; 1100 bool ret;
@@ -1109,7 +1117,7 @@ static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx *hctx,
1109 if (!list_empty_careful(&wait->entry)) 1117 if (!list_empty_careful(&wait->entry))
1110 return false; 1118 return false;
1111 1119
1112 wq = &bt_wait_ptr(&hctx->tags->bitmap_tags, hctx)->wait; 1120 wq = &bt_wait_ptr(sbq, hctx)->wait;
1113 1121
1114 spin_lock_irq(&wq->lock); 1122 spin_lock_irq(&wq->lock);
1115 spin_lock(&hctx->dispatch_wait_lock); 1123 spin_lock(&hctx->dispatch_wait_lock);
@@ -1119,6 +1127,7 @@ static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx *hctx,
1119 return false; 1127 return false;
1120 } 1128 }
1121 1129
1130 atomic_inc(&sbq->ws_active);
1122 wait->flags &= ~WQ_FLAG_EXCLUSIVE; 1131 wait->flags &= ~WQ_FLAG_EXCLUSIVE;
1123 __add_wait_queue(wq, wait); 1132 __add_wait_queue(wq, wait);
1124 1133
@@ -1139,6 +1148,7 @@ static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx *hctx,
1139 * someone else gets the wakeup. 1148 * someone else gets the wakeup.
1140 */ 1149 */
1141 list_del_init(&wait->entry); 1150 list_del_init(&wait->entry);
1151 atomic_dec(&sbq->ws_active);
1142 spin_unlock(&hctx->dispatch_wait_lock); 1152 spin_unlock(&hctx->dispatch_wait_lock);
1143 spin_unlock_irq(&wq->lock); 1153 spin_unlock_irq(&wq->lock);
1144 1154
diff --git a/block/blk-mq.h b/block/blk-mq.h
index 0ed8e5a8729f..d704fc7766f4 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -224,15 +224,6 @@ static inline void __blk_mq_put_driver_tag(struct blk_mq_hw_ctx *hctx,
224 } 224 }
225} 225}
226 226
227static inline void blk_mq_put_driver_tag_hctx(struct blk_mq_hw_ctx *hctx,
228 struct request *rq)
229{
230 if (rq->tag == -1 || rq->internal_tag == -1)
231 return;
232
233 __blk_mq_put_driver_tag(hctx, rq);
234}
235
236static inline void blk_mq_put_driver_tag(struct request *rq) 227static inline void blk_mq_put_driver_tag(struct request *rq)
237{ 228{
238 if (rq->tag == -1 || rq->internal_tag == -1) 229 if (rq->tag == -1 || rq->internal_tag == -1)
diff --git a/drivers/ata/libata-zpodd.c b/drivers/ata/libata-zpodd.c
index b3ed8f9953a8..173e6f2dd9af 100644
--- a/drivers/ata/libata-zpodd.c
+++ b/drivers/ata/libata-zpodd.c
@@ -52,38 +52,52 @@ static int eject_tray(struct ata_device *dev)
52/* Per the spec, only slot type and drawer type ODD can be supported */ 52/* Per the spec, only slot type and drawer type ODD can be supported */
53static enum odd_mech_type zpodd_get_mech_type(struct ata_device *dev) 53static enum odd_mech_type zpodd_get_mech_type(struct ata_device *dev)
54{ 54{
55 char buf[16]; 55 char *buf;
56 unsigned int ret; 56 unsigned int ret;
57 struct rm_feature_desc *desc = (void *)(buf + 8); 57 struct rm_feature_desc *desc;
58 struct ata_taskfile tf; 58 struct ata_taskfile tf;
59 static const char cdb[] = { GPCMD_GET_CONFIGURATION, 59 static const char cdb[] = { GPCMD_GET_CONFIGURATION,
60 2, /* only 1 feature descriptor requested */ 60 2, /* only 1 feature descriptor requested */
61 0, 3, /* 3, removable medium feature */ 61 0, 3, /* 3, removable medium feature */
62 0, 0, 0,/* reserved */ 62 0, 0, 0,/* reserved */
63 0, sizeof(buf), 63 0, 16,
64 0, 0, 0, 64 0, 0, 0,
65 }; 65 };
66 66
67 buf = kzalloc(16, GFP_KERNEL);
68 if (!buf)
69 return ODD_MECH_TYPE_UNSUPPORTED;
70 desc = (void *)(buf + 8);
71
67 ata_tf_init(dev, &tf); 72 ata_tf_init(dev, &tf);
68 tf.flags = ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE; 73 tf.flags = ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE;
69 tf.command = ATA_CMD_PACKET; 74 tf.command = ATA_CMD_PACKET;
70 tf.protocol = ATAPI_PROT_PIO; 75 tf.protocol = ATAPI_PROT_PIO;
71 tf.lbam = sizeof(buf); 76 tf.lbam = 16;
72 77
73 ret = ata_exec_internal(dev, &tf, cdb, DMA_FROM_DEVICE, 78 ret = ata_exec_internal(dev, &tf, cdb, DMA_FROM_DEVICE,
74 buf, sizeof(buf), 0); 79 buf, 16, 0);
75 if (ret) 80 if (ret) {
81 kfree(buf);
76 return ODD_MECH_TYPE_UNSUPPORTED; 82 return ODD_MECH_TYPE_UNSUPPORTED;
83 }
77 84
78 if (be16_to_cpu(desc->feature_code) != 3) 85 if (be16_to_cpu(desc->feature_code) != 3) {
86 kfree(buf);
79 return ODD_MECH_TYPE_UNSUPPORTED; 87 return ODD_MECH_TYPE_UNSUPPORTED;
88 }
80 89
81 if (desc->mech_type == 0 && desc->load == 0 && desc->eject == 1) 90 if (desc->mech_type == 0 && desc->load == 0 && desc->eject == 1) {
91 kfree(buf);
82 return ODD_MECH_TYPE_SLOT; 92 return ODD_MECH_TYPE_SLOT;
83 else if (desc->mech_type == 1 && desc->load == 0 && desc->eject == 1) 93 } else if (desc->mech_type == 1 && desc->load == 0 &&
94 desc->eject == 1) {
95 kfree(buf);
84 return ODD_MECH_TYPE_DRAWER; 96 return ODD_MECH_TYPE_DRAWER;
85 else 97 } else {
98 kfree(buf);
86 return ODD_MECH_TYPE_UNSUPPORTED; 99 return ODD_MECH_TYPE_UNSUPPORTED;
100 }
87} 101}
88 102
89/* Test if ODD is zero power ready by sense code */ 103/* Test if ODD is zero power ready by sense code */
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 2839bb70badf..f0716f6ce41f 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -404,15 +404,12 @@ static inline bool nvme_state_is_live(enum nvme_ana_state state)
404static void nvme_update_ns_ana_state(struct nvme_ana_group_desc *desc, 404static void nvme_update_ns_ana_state(struct nvme_ana_group_desc *desc,
405 struct nvme_ns *ns) 405 struct nvme_ns *ns)
406{ 406{
407 enum nvme_ana_state old;
408
409 mutex_lock(&ns->head->lock); 407 mutex_lock(&ns->head->lock);
410 old = ns->ana_state;
411 ns->ana_grpid = le32_to_cpu(desc->grpid); 408 ns->ana_grpid = le32_to_cpu(desc->grpid);
412 ns->ana_state = desc->state; 409 ns->ana_state = desc->state;
413 clear_bit(NVME_NS_ANA_PENDING, &ns->flags); 410 clear_bit(NVME_NS_ANA_PENDING, &ns->flags);
414 411
415 if (nvme_state_is_live(ns->ana_state) && !nvme_state_is_live(old)) 412 if (nvme_state_is_live(ns->ana_state))
416 nvme_mpath_set_live(ns); 413 nvme_mpath_set_live(ns);
417 mutex_unlock(&ns->head->lock); 414 mutex_unlock(&ns->head->lock);
418} 415}
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index e7e08889865e..68c49dd67210 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -627,7 +627,7 @@ static int nvme_tcp_recv_pdu(struct nvme_tcp_queue *queue, struct sk_buff *skb,
627 return ret; 627 return ret;
628} 628}
629 629
630static inline void nvme_tcp_end_request(struct request *rq, __le16 status) 630static inline void nvme_tcp_end_request(struct request *rq, u16 status)
631{ 631{
632 union nvme_result res = {}; 632 union nvme_result res = {};
633 633
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index 2d73b66e3686..b3e765a95af8 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -509,7 +509,7 @@ int nvmet_ns_enable(struct nvmet_ns *ns)
509 509
510 ret = nvmet_p2pmem_ns_enable(ns); 510 ret = nvmet_p2pmem_ns_enable(ns);
511 if (ret) 511 if (ret)
512 goto out_unlock; 512 goto out_dev_disable;
513 513
514 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) 514 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry)
515 nvmet_p2pmem_ns_add_p2p(ctrl, ns); 515 nvmet_p2pmem_ns_add_p2p(ctrl, ns);
@@ -550,7 +550,7 @@ out_unlock:
550out_dev_put: 550out_dev_put:
551 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) 551 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry)
552 pci_dev_put(radix_tree_delete(&ctrl->p2p_ns_map, ns->nsid)); 552 pci_dev_put(radix_tree_delete(&ctrl->p2p_ns_map, ns->nsid));
553 553out_dev_disable:
554 nvmet_ns_dev_disable(ns); 554 nvmet_ns_dev_disable(ns);
555 goto out_unlock; 555 goto out_unlock;
556} 556}
diff --git a/drivers/nvme/target/io-cmd-file.c b/drivers/nvme/target/io-cmd-file.c
index 3e43212d3c1c..bc6ebb51b0bf 100644
--- a/drivers/nvme/target/io-cmd-file.c
+++ b/drivers/nvme/target/io-cmd-file.c
@@ -75,11 +75,11 @@ err:
75 return ret; 75 return ret;
76} 76}
77 77
78static void nvmet_file_init_bvec(struct bio_vec *bv, struct sg_page_iter *iter) 78static void nvmet_file_init_bvec(struct bio_vec *bv, struct scatterlist *sg)
79{ 79{
80 bv->bv_page = sg_page_iter_page(iter); 80 bv->bv_page = sg_page(sg);
81 bv->bv_offset = iter->sg->offset; 81 bv->bv_offset = sg->offset;
82 bv->bv_len = PAGE_SIZE - iter->sg->offset; 82 bv->bv_len = sg->length;
83} 83}
84 84
85static ssize_t nvmet_file_submit_bvec(struct nvmet_req *req, loff_t pos, 85static ssize_t nvmet_file_submit_bvec(struct nvmet_req *req, loff_t pos,
@@ -128,14 +128,14 @@ static void nvmet_file_io_done(struct kiocb *iocb, long ret, long ret2)
128 128
129static bool nvmet_file_execute_io(struct nvmet_req *req, int ki_flags) 129static bool nvmet_file_execute_io(struct nvmet_req *req, int ki_flags)
130{ 130{
131 ssize_t nr_bvec = DIV_ROUND_UP(req->data_len, PAGE_SIZE); 131 ssize_t nr_bvec = req->sg_cnt;
132 struct sg_page_iter sg_pg_iter;
133 unsigned long bv_cnt = 0; 132 unsigned long bv_cnt = 0;
134 bool is_sync = false; 133 bool is_sync = false;
135 size_t len = 0, total_len = 0; 134 size_t len = 0, total_len = 0;
136 ssize_t ret = 0; 135 ssize_t ret = 0;
137 loff_t pos; 136 loff_t pos;
138 137 int i;
138 struct scatterlist *sg;
139 139
140 if (req->f.mpool_alloc && nr_bvec > NVMET_MAX_MPOOL_BVEC) 140 if (req->f.mpool_alloc && nr_bvec > NVMET_MAX_MPOOL_BVEC)
141 is_sync = true; 141 is_sync = true;
@@ -147,8 +147,8 @@ static bool nvmet_file_execute_io(struct nvmet_req *req, int ki_flags)
147 } 147 }
148 148
149 memset(&req->f.iocb, 0, sizeof(struct kiocb)); 149 memset(&req->f.iocb, 0, sizeof(struct kiocb));
150 for_each_sg_page(req->sg, &sg_pg_iter, req->sg_cnt, 0) { 150 for_each_sg(req->sg, sg, req->sg_cnt, i) {
151 nvmet_file_init_bvec(&req->f.bvec[bv_cnt], &sg_pg_iter); 151 nvmet_file_init_bvec(&req->f.bvec[bv_cnt], sg);
152 len += req->f.bvec[bv_cnt].bv_len; 152 len += req->f.bvec[bv_cnt].bv_len;
153 total_len += req->f.bvec[bv_cnt].bv_len; 153 total_len += req->f.bvec[bv_cnt].bv_len;
154 bv_cnt++; 154 bv_cnt++;
@@ -225,7 +225,7 @@ static void nvmet_file_submit_buffered_io(struct nvmet_req *req)
225 225
226static void nvmet_file_execute_rw(struct nvmet_req *req) 226static void nvmet_file_execute_rw(struct nvmet_req *req)
227{ 227{
228 ssize_t nr_bvec = DIV_ROUND_UP(req->data_len, PAGE_SIZE); 228 ssize_t nr_bvec = req->sg_cnt;
229 229
230 if (!req->sg_cnt || !nr_bvec) { 230 if (!req->sg_cnt || !nr_bvec) {
231 nvmet_req_complete(req, 0); 231 nvmet_req_complete(req, 0);
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 6aaa30580a2b..bbdbd56cf2ac 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1022,6 +1022,8 @@ static int io_write(struct io_kiocb *req, const struct sqe_submit *s,
1022 1022
1023 ret = rw_verify_area(WRITE, file, &kiocb->ki_pos, iov_count); 1023 ret = rw_verify_area(WRITE, file, &kiocb->ki_pos, iov_count);
1024 if (!ret) { 1024 if (!ret) {
1025 ssize_t ret2;
1026
1025 /* 1027 /*
1026 * Open-code file_start_write here to grab freeze protection, 1028 * Open-code file_start_write here to grab freeze protection,
1027 * which will be released by another thread in 1029 * which will be released by another thread in
@@ -1036,7 +1038,19 @@ static int io_write(struct io_kiocb *req, const struct sqe_submit *s,
1036 SB_FREEZE_WRITE); 1038 SB_FREEZE_WRITE);
1037 } 1039 }
1038 kiocb->ki_flags |= IOCB_WRITE; 1040 kiocb->ki_flags |= IOCB_WRITE;
1039 io_rw_done(kiocb, call_write_iter(file, kiocb, &iter)); 1041
1042 ret2 = call_write_iter(file, kiocb, &iter);
1043 if (!force_nonblock || ret2 != -EAGAIN) {
1044 io_rw_done(kiocb, ret2);
1045 } else {
1046 /*
1047 * If ->needs_lock is true, we're already in async
1048 * context.
1049 */
1050 if (!s->needs_lock)
1051 io_async_list_note(WRITE, req, iov_count);
1052 ret = -EAGAIN;
1053 }
1040 } 1054 }
1041out_free: 1055out_free:
1042 kfree(iovec); 1056 kfree(iovec);
@@ -1968,7 +1982,15 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
1968 return 0; 1982 return 0;
1969 1983
1970 if (sig) { 1984 if (sig) {
1971 ret = set_user_sigmask(sig, &ksigmask, &sigsaved, sigsz); 1985#ifdef CONFIG_COMPAT
1986 if (in_compat_syscall())
1987 ret = set_compat_user_sigmask((const compat_sigset_t __user *)sig,
1988 &ksigmask, &sigsaved, sigsz);
1989 else
1990#endif
1991 ret = set_user_sigmask(sig, &ksigmask,
1992 &sigsaved, sigsz);
1993
1972 if (ret) 1994 if (ret)
1973 return ret; 1995 return ret;
1974 } 1996 }
diff --git a/lib/sbitmap.c b/lib/sbitmap.c
index 5b382c1244ed..155fe38756ec 100644
--- a/lib/sbitmap.c
+++ b/lib/sbitmap.c
@@ -591,6 +591,17 @@ EXPORT_SYMBOL_GPL(sbitmap_queue_wake_up);
591void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr, 591void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr,
592 unsigned int cpu) 592 unsigned int cpu)
593{ 593{
594 /*
595 * Once the clear bit is set, the bit may be allocated out.
596 *
597 * Orders READ/WRITE on the asssociated instance(such as request
598 * of blk_mq) by this bit for avoiding race with re-allocation,
599 * and its pair is the memory barrier implied in __sbitmap_get_word.
600 *
601 * One invariant is that the clear bit has to be zero when the bit
602 * is in use.
603 */
604 smp_mb__before_atomic();
594 sbitmap_deferred_clear_bit(&sbq->sb, nr); 605 sbitmap_deferred_clear_bit(&sbq->sb, nr);
595 606
596 /* 607 /*