aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-04-13 19:23:16 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2019-04-13 19:23:16 -0400
commit4443f8e6ac7755cd775c70d08be8042dc2f936cb (patch)
tree7adb7c79800f27cd327cbafa792d9a847b804937
parentb60bc0665e6af8c55b946b67ea8cb235823bb74e (diff)
parenta89afe58f1a74aac768a5eb77af95ef4ee15beaa (diff)
Merge tag 'for-linus-20190412' of git://git.kernel.dk/linux-block
Pull block fixes from Jens Axboe: "Set of fixes that should go into this round. This pull is larger than I'd like at this time, but there's really no specific reason for that. Some are fixes for issues that went into this merge window, others are not. Anyway, this contains: - Hardware queue limiting for virtio-blk/scsi (Dongli) - Multi-page bvec fixes for lightnvm pblk - Multi-bio dio error fix (Jason) - Remove the cache hint from the io_uring tool side, since we didn't move forward with that (me) - Make io_uring SETUP_SQPOLL root restricted (me) - Fix leak of page in error handling for pc requests (Jérôme) - Fix BFQ regression introduced in this merge window (Paolo) - Fix break logic for bio segment iteration (Ming) - Fix NVMe cancel request error handling (Ming) - NVMe pull request with two fixes (Christoph): - fix the initial CSN for nvme-fc (James) - handle log page offsets properly in the target (Keith)" * tag 'for-linus-20190412' of git://git.kernel.dk/linux-block: block: fix the return errno for direct IO nvmet: fix discover log page when offsets are used nvme-fc: correct csn initialization and increments on error block: do not leak memory in bio_copy_user_iov() lightnvm: pblk: fix crash in pblk_end_partial_read due to multipage bvecs nvme: cancel request synchronously blk-mq: introduce blk_mq_complete_request_sync() scsi: virtio_scsi: limit number of hw queues by nr_cpu_ids virtio-blk: limit number of hw queues by nr_cpu_ids block, bfq: fix use after free in bfq_bfqq_expire io_uring: restrict IORING_SETUP_SQPOLL to root tools/io_uring: remove IOCQE_FLAG_CACHEHIT block: don't use for-inside-for in bio_for_each_segment_all
-rw-r--r--block/bfq-iosched.c15
-rw-r--r--block/bfq-iosched.h2
-rw-r--r--block/bfq-wf2q.c17
-rw-r--r--block/bio.c5
-rw-r--r--block/blk-mq.c7
-rw-r--r--drivers/block/virtio_blk.c2
-rw-r--r--drivers/lightnvm/pblk-read.c50
-rw-r--r--drivers/nvme/host/core.c2
-rw-r--r--drivers/nvme/host/fc.c20
-rw-r--r--drivers/nvme/target/admin-cmd.c5
-rw-r--r--drivers/nvme/target/discovery.c68
-rw-r--r--drivers/nvme/target/nvmet.h1
-rw-r--r--drivers/scsi/virtio_scsi.c1
-rw-r--r--fs/block_dev.c8
-rw-r--r--fs/io_uring.c4
-rw-r--r--include/linux/bio.h20
-rw-r--r--include/linux/blk-mq.h1
-rw-r--r--include/linux/bvec.h14
-rw-r--r--include/linux/nvme.h9
-rw-r--r--tools/io_uring/io_uring-bench.c32
20 files changed, 174 insertions, 109 deletions
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index fac188dd78fa..dfb8cb0af13a 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -2822,7 +2822,7 @@ static void bfq_dispatch_remove(struct request_queue *q, struct request *rq)
2822 bfq_remove_request(q, rq); 2822 bfq_remove_request(q, rq);
2823} 2823}
2824 2824
2825static void __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq) 2825static bool __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq)
2826{ 2826{
2827 /* 2827 /*
2828 * If this bfqq is shared between multiple processes, check 2828 * If this bfqq is shared between multiple processes, check
@@ -2855,9 +2855,11 @@ static void __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq)
2855 /* 2855 /*
2856 * All in-service entities must have been properly deactivated 2856 * All in-service entities must have been properly deactivated
2857 * or requeued before executing the next function, which 2857 * or requeued before executing the next function, which
2858 * resets all in-service entites as no more in service. 2858 * resets all in-service entities as no more in service. This
2859 * may cause bfqq to be freed. If this happens, the next
2860 * function returns true.
2859 */ 2861 */
2860 __bfq_bfqd_reset_in_service(bfqd); 2862 return __bfq_bfqd_reset_in_service(bfqd);
2861} 2863}
2862 2864
2863/** 2865/**
@@ -3262,7 +3264,6 @@ void bfq_bfqq_expire(struct bfq_data *bfqd,
3262 bool slow; 3264 bool slow;
3263 unsigned long delta = 0; 3265 unsigned long delta = 0;
3264 struct bfq_entity *entity = &bfqq->entity; 3266 struct bfq_entity *entity = &bfqq->entity;
3265 int ref;
3266 3267
3267 /* 3268 /*
3268 * Check whether the process is slow (see bfq_bfqq_is_slow). 3269 * Check whether the process is slow (see bfq_bfqq_is_slow).
@@ -3347,10 +3348,8 @@ void bfq_bfqq_expire(struct bfq_data *bfqd,
3347 * reason. 3348 * reason.
3348 */ 3349 */
3349 __bfq_bfqq_recalc_budget(bfqd, bfqq, reason); 3350 __bfq_bfqq_recalc_budget(bfqd, bfqq, reason);
3350 ref = bfqq->ref; 3351 if (__bfq_bfqq_expire(bfqd, bfqq))
3351 __bfq_bfqq_expire(bfqd, bfqq); 3352 /* bfqq is gone, no more actions on it */
3352
3353 if (ref == 1) /* bfqq is gone, no more actions on it */
3354 return; 3353 return;
3355 3354
3356 bfqq->injected_service = 0; 3355 bfqq->injected_service = 0;
diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h
index 062e1c4787f4..86394e503ca9 100644
--- a/block/bfq-iosched.h
+++ b/block/bfq-iosched.h
@@ -995,7 +995,7 @@ bool __bfq_deactivate_entity(struct bfq_entity *entity,
995 bool ins_into_idle_tree); 995 bool ins_into_idle_tree);
996bool next_queue_may_preempt(struct bfq_data *bfqd); 996bool next_queue_may_preempt(struct bfq_data *bfqd);
997struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd); 997struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd);
998void __bfq_bfqd_reset_in_service(struct bfq_data *bfqd); 998bool __bfq_bfqd_reset_in_service(struct bfq_data *bfqd);
999void bfq_deactivate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq, 999void bfq_deactivate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
1000 bool ins_into_idle_tree, bool expiration); 1000 bool ins_into_idle_tree, bool expiration);
1001void bfq_activate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq); 1001void bfq_activate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq);
diff --git a/block/bfq-wf2q.c b/block/bfq-wf2q.c
index a11bef75483d..ae4d000ac0af 100644
--- a/block/bfq-wf2q.c
+++ b/block/bfq-wf2q.c
@@ -1605,7 +1605,8 @@ struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd)
1605 return bfqq; 1605 return bfqq;
1606} 1606}
1607 1607
1608void __bfq_bfqd_reset_in_service(struct bfq_data *bfqd) 1608/* returns true if the in-service queue gets freed */
1609bool __bfq_bfqd_reset_in_service(struct bfq_data *bfqd)
1609{ 1610{
1610 struct bfq_queue *in_serv_bfqq = bfqd->in_service_queue; 1611 struct bfq_queue *in_serv_bfqq = bfqd->in_service_queue;
1611 struct bfq_entity *in_serv_entity = &in_serv_bfqq->entity; 1612 struct bfq_entity *in_serv_entity = &in_serv_bfqq->entity;
@@ -1629,8 +1630,20 @@ void __bfq_bfqd_reset_in_service(struct bfq_data *bfqd)
1629 * service tree either, then release the service reference to 1630 * service tree either, then release the service reference to
1630 * the queue it represents (taken with bfq_get_entity). 1631 * the queue it represents (taken with bfq_get_entity).
1631 */ 1632 */
1632 if (!in_serv_entity->on_st) 1633 if (!in_serv_entity->on_st) {
1634 /*
1635 * If no process is referencing in_serv_bfqq any
1636 * longer, then the service reference may be the only
1637 * reference to the queue. If this is the case, then
1638 * bfqq gets freed here.
1639 */
1640 int ref = in_serv_bfqq->ref;
1633 bfq_put_queue(in_serv_bfqq); 1641 bfq_put_queue(in_serv_bfqq);
1642 if (ref == 1)
1643 return true;
1644 }
1645
1646 return false;
1634} 1647}
1635 1648
1636void bfq_deactivate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq, 1649void bfq_deactivate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
diff --git a/block/bio.c b/block/bio.c
index b64cedc7f87c..716510ecd7ff 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1298,8 +1298,11 @@ struct bio *bio_copy_user_iov(struct request_queue *q,
1298 } 1298 }
1299 } 1299 }
1300 1300
1301 if (bio_add_pc_page(q, bio, page, bytes, offset) < bytes) 1301 if (bio_add_pc_page(q, bio, page, bytes, offset) < bytes) {
1302 if (!map_data)
1303 __free_page(page);
1302 break; 1304 break;
1305 }
1303 1306
1304 len -= bytes; 1307 len -= bytes;
1305 offset = 0; 1308 offset = 0;
diff --git a/block/blk-mq.c b/block/blk-mq.c
index a9354835cf51..9516304a38ee 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -654,6 +654,13 @@ bool blk_mq_complete_request(struct request *rq)
654} 654}
655EXPORT_SYMBOL(blk_mq_complete_request); 655EXPORT_SYMBOL(blk_mq_complete_request);
656 656
657void blk_mq_complete_request_sync(struct request *rq)
658{
659 WRITE_ONCE(rq->state, MQ_RQ_COMPLETE);
660 rq->q->mq_ops->complete(rq);
661}
662EXPORT_SYMBOL_GPL(blk_mq_complete_request_sync);
663
657int blk_mq_request_started(struct request *rq) 664int blk_mq_request_started(struct request *rq)
658{ 665{
659 return blk_mq_rq_state(rq) != MQ_RQ_IDLE; 666 return blk_mq_rq_state(rq) != MQ_RQ_IDLE;
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 4bc083b7c9b5..2a7ca4a1e6f7 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -513,6 +513,8 @@ static int init_vq(struct virtio_blk *vblk)
513 if (err) 513 if (err)
514 num_vqs = 1; 514 num_vqs = 1;
515 515
516 num_vqs = min_t(unsigned int, nr_cpu_ids, num_vqs);
517
516 vblk->vqs = kmalloc_array(num_vqs, sizeof(*vblk->vqs), GFP_KERNEL); 518 vblk->vqs = kmalloc_array(num_vqs, sizeof(*vblk->vqs), GFP_KERNEL);
517 if (!vblk->vqs) 519 if (!vblk->vqs)
518 return -ENOMEM; 520 return -ENOMEM;
diff --git a/drivers/lightnvm/pblk-read.c b/drivers/lightnvm/pblk-read.c
index 3789185144da..0b7d5fb4548d 100644
--- a/drivers/lightnvm/pblk-read.c
+++ b/drivers/lightnvm/pblk-read.c
@@ -231,14 +231,14 @@ static void pblk_end_partial_read(struct nvm_rq *rqd)
231 struct pblk_sec_meta *meta; 231 struct pblk_sec_meta *meta;
232 struct bio *new_bio = rqd->bio; 232 struct bio *new_bio = rqd->bio;
233 struct bio *bio = pr_ctx->orig_bio; 233 struct bio *bio = pr_ctx->orig_bio;
234 struct bio_vec src_bv, dst_bv;
235 void *meta_list = rqd->meta_list; 234 void *meta_list = rqd->meta_list;
236 int bio_init_idx = pr_ctx->bio_init_idx;
237 unsigned long *read_bitmap = pr_ctx->bitmap; 235 unsigned long *read_bitmap = pr_ctx->bitmap;
236 struct bvec_iter orig_iter = BVEC_ITER_ALL_INIT;
237 struct bvec_iter new_iter = BVEC_ITER_ALL_INIT;
238 int nr_secs = pr_ctx->orig_nr_secs; 238 int nr_secs = pr_ctx->orig_nr_secs;
239 int nr_holes = nr_secs - bitmap_weight(read_bitmap, nr_secs); 239 int nr_holes = nr_secs - bitmap_weight(read_bitmap, nr_secs);
240 void *src_p, *dst_p; 240 void *src_p, *dst_p;
241 int hole, i; 241 int bit, i;
242 242
243 if (unlikely(nr_holes == 1)) { 243 if (unlikely(nr_holes == 1)) {
244 struct ppa_addr ppa; 244 struct ppa_addr ppa;
@@ -257,33 +257,39 @@ static void pblk_end_partial_read(struct nvm_rq *rqd)
257 257
258 /* Fill the holes in the original bio */ 258 /* Fill the holes in the original bio */
259 i = 0; 259 i = 0;
260 hole = find_first_zero_bit(read_bitmap, nr_secs); 260 for (bit = 0; bit < nr_secs; bit++) {
261 do { 261 if (!test_bit(bit, read_bitmap)) {
262 struct pblk_line *line; 262 struct bio_vec dst_bv, src_bv;
263 struct pblk_line *line;
263 264
264 line = pblk_ppa_to_line(pblk, rqd->ppa_list[i]); 265 line = pblk_ppa_to_line(pblk, rqd->ppa_list[i]);
265 kref_put(&line->ref, pblk_line_put); 266 kref_put(&line->ref, pblk_line_put);
266 267
267 meta = pblk_get_meta(pblk, meta_list, hole); 268 meta = pblk_get_meta(pblk, meta_list, bit);
268 meta->lba = cpu_to_le64(pr_ctx->lba_list_media[i]); 269 meta->lba = cpu_to_le64(pr_ctx->lba_list_media[i]);
269 270
270 src_bv = new_bio->bi_io_vec[i++]; 271 dst_bv = bio_iter_iovec(bio, orig_iter);
271 dst_bv = bio->bi_io_vec[bio_init_idx + hole]; 272 src_bv = bio_iter_iovec(new_bio, new_iter);
272 273
273 src_p = kmap_atomic(src_bv.bv_page); 274 src_p = kmap_atomic(src_bv.bv_page);
274 dst_p = kmap_atomic(dst_bv.bv_page); 275 dst_p = kmap_atomic(dst_bv.bv_page);
275 276
276 memcpy(dst_p + dst_bv.bv_offset, 277 memcpy(dst_p + dst_bv.bv_offset,
277 src_p + src_bv.bv_offset, 278 src_p + src_bv.bv_offset,
278 PBLK_EXPOSED_PAGE_SIZE); 279 PBLK_EXPOSED_PAGE_SIZE);
279 280
280 kunmap_atomic(src_p); 281 kunmap_atomic(src_p);
281 kunmap_atomic(dst_p); 282 kunmap_atomic(dst_p);
282 283
283 mempool_free(src_bv.bv_page, &pblk->page_bio_pool); 284 flush_dcache_page(dst_bv.bv_page);
285 mempool_free(src_bv.bv_page, &pblk->page_bio_pool);
284 286
285 hole = find_next_zero_bit(read_bitmap, nr_secs, hole + 1); 287 bio_advance_iter(new_bio, &new_iter,
286 } while (hole < nr_secs); 288 PBLK_EXPOSED_PAGE_SIZE);
289 i++;
290 }
291 bio_advance_iter(bio, &orig_iter, PBLK_EXPOSED_PAGE_SIZE);
292 }
287 293
288 bio_put(new_bio); 294 bio_put(new_bio);
289 kfree(pr_ctx); 295 kfree(pr_ctx);
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 470601980794..2c43e12b70af 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -288,7 +288,7 @@ bool nvme_cancel_request(struct request *req, void *data, bool reserved)
288 "Cancelling I/O %d", req->tag); 288 "Cancelling I/O %d", req->tag);
289 289
290 nvme_req(req)->status = NVME_SC_ABORT_REQ; 290 nvme_req(req)->status = NVME_SC_ABORT_REQ;
291 blk_mq_complete_request(req); 291 blk_mq_complete_request_sync(req);
292 return true; 292 return true;
293} 293}
294EXPORT_SYMBOL_GPL(nvme_cancel_request); 294EXPORT_SYMBOL_GPL(nvme_cancel_request);
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index f3b9d91ba0df..6d8451356eac 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -1845,7 +1845,7 @@ nvme_fc_init_queue(struct nvme_fc_ctrl *ctrl, int idx)
1845 memset(queue, 0, sizeof(*queue)); 1845 memset(queue, 0, sizeof(*queue));
1846 queue->ctrl = ctrl; 1846 queue->ctrl = ctrl;
1847 queue->qnum = idx; 1847 queue->qnum = idx;
1848 atomic_set(&queue->csn, 1); 1848 atomic_set(&queue->csn, 0);
1849 queue->dev = ctrl->dev; 1849 queue->dev = ctrl->dev;
1850 1850
1851 if (idx > 0) 1851 if (idx > 0)
@@ -1887,7 +1887,7 @@ nvme_fc_free_queue(struct nvme_fc_queue *queue)
1887 */ 1887 */
1888 1888
1889 queue->connection_id = 0; 1889 queue->connection_id = 0;
1890 atomic_set(&queue->csn, 1); 1890 atomic_set(&queue->csn, 0);
1891} 1891}
1892 1892
1893static void 1893static void
@@ -2183,7 +2183,6 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
2183{ 2183{
2184 struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu; 2184 struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu;
2185 struct nvme_command *sqe = &cmdiu->sqe; 2185 struct nvme_command *sqe = &cmdiu->sqe;
2186 u32 csn;
2187 int ret, opstate; 2186 int ret, opstate;
2188 2187
2189 /* 2188 /*
@@ -2198,8 +2197,6 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
2198 2197
2199 /* format the FC-NVME CMD IU and fcp_req */ 2198 /* format the FC-NVME CMD IU and fcp_req */
2200 cmdiu->connection_id = cpu_to_be64(queue->connection_id); 2199 cmdiu->connection_id = cpu_to_be64(queue->connection_id);
2201 csn = atomic_inc_return(&queue->csn);
2202 cmdiu->csn = cpu_to_be32(csn);
2203 cmdiu->data_len = cpu_to_be32(data_len); 2200 cmdiu->data_len = cpu_to_be32(data_len);
2204 switch (io_dir) { 2201 switch (io_dir) {
2205 case NVMEFC_FCP_WRITE: 2202 case NVMEFC_FCP_WRITE:
@@ -2257,11 +2254,24 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
2257 if (!(op->flags & FCOP_FLAGS_AEN)) 2254 if (!(op->flags & FCOP_FLAGS_AEN))
2258 blk_mq_start_request(op->rq); 2255 blk_mq_start_request(op->rq);
2259 2256
2257 cmdiu->csn = cpu_to_be32(atomic_inc_return(&queue->csn));
2260 ret = ctrl->lport->ops->fcp_io(&ctrl->lport->localport, 2258 ret = ctrl->lport->ops->fcp_io(&ctrl->lport->localport,
2261 &ctrl->rport->remoteport, 2259 &ctrl->rport->remoteport,
2262 queue->lldd_handle, &op->fcp_req); 2260 queue->lldd_handle, &op->fcp_req);
2263 2261
2264 if (ret) { 2262 if (ret) {
2263 /*
2264 * If the lld fails to send the command is there an issue with
2265 * the csn value? If the command that fails is the Connect,
2266 * no - as the connection won't be live. If it is a command
2267 * post-connect, it's possible a gap in csn may be created.
2268 * Does this matter? As Linux initiators don't send fused
2269 * commands, no. The gap would exist, but as there's nothing
2270 * that depends on csn order to be delivered on the target
2271 * side, it shouldn't hurt. It would be difficult for a
2272 * target to even detect the csn gap as it has no idea when the
2273 * cmd with the csn was supposed to arrive.
2274 */
2265 opstate = atomic_xchg(&op->state, FCPOP_STATE_COMPLETE); 2275 opstate = atomic_xchg(&op->state, FCPOP_STATE_COMPLETE);
2266 __nvme_fc_fcpop_chk_teardowns(ctrl, op, opstate); 2276 __nvme_fc_fcpop_chk_teardowns(ctrl, op, opstate);
2267 2277
diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
index 76250181fee0..9f72d515fc4b 100644
--- a/drivers/nvme/target/admin-cmd.c
+++ b/drivers/nvme/target/admin-cmd.c
@@ -24,6 +24,11 @@ u32 nvmet_get_log_page_len(struct nvme_command *cmd)
24 return len; 24 return len;
25} 25}
26 26
27u64 nvmet_get_log_page_offset(struct nvme_command *cmd)
28{
29 return le64_to_cpu(cmd->get_log_page.lpo);
30}
31
27static void nvmet_execute_get_log_page_noop(struct nvmet_req *req) 32static void nvmet_execute_get_log_page_noop(struct nvmet_req *req)
28{ 33{
29 nvmet_req_complete(req, nvmet_zero_sgl(req, 0, req->data_len)); 34 nvmet_req_complete(req, nvmet_zero_sgl(req, 0, req->data_len));
diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c
index c872b47a88f3..33ed95e72d6b 100644
--- a/drivers/nvme/target/discovery.c
+++ b/drivers/nvme/target/discovery.c
@@ -131,54 +131,76 @@ static void nvmet_set_disc_traddr(struct nvmet_req *req, struct nvmet_port *port
131 memcpy(traddr, port->disc_addr.traddr, NVMF_TRADDR_SIZE); 131 memcpy(traddr, port->disc_addr.traddr, NVMF_TRADDR_SIZE);
132} 132}
133 133
134static size_t discovery_log_entries(struct nvmet_req *req)
135{
136 struct nvmet_ctrl *ctrl = req->sq->ctrl;
137 struct nvmet_subsys_link *p;
138 struct nvmet_port *r;
139 size_t entries = 0;
140
141 list_for_each_entry(p, &req->port->subsystems, entry) {
142 if (!nvmet_host_allowed(p->subsys, ctrl->hostnqn))
143 continue;
144 entries++;
145 }
146 list_for_each_entry(r, &req->port->referrals, entry)
147 entries++;
148 return entries;
149}
150
134static void nvmet_execute_get_disc_log_page(struct nvmet_req *req) 151static void nvmet_execute_get_disc_log_page(struct nvmet_req *req)
135{ 152{
136 const int entry_size = sizeof(struct nvmf_disc_rsp_page_entry); 153 const int entry_size = sizeof(struct nvmf_disc_rsp_page_entry);
137 struct nvmet_ctrl *ctrl = req->sq->ctrl; 154 struct nvmet_ctrl *ctrl = req->sq->ctrl;
138 struct nvmf_disc_rsp_page_hdr *hdr; 155 struct nvmf_disc_rsp_page_hdr *hdr;
156 u64 offset = nvmet_get_log_page_offset(req->cmd);
139 size_t data_len = nvmet_get_log_page_len(req->cmd); 157 size_t data_len = nvmet_get_log_page_len(req->cmd);
140 size_t alloc_len = max(data_len, sizeof(*hdr)); 158 size_t alloc_len;
141 int residual_len = data_len - sizeof(*hdr);
142 struct nvmet_subsys_link *p; 159 struct nvmet_subsys_link *p;
143 struct nvmet_port *r; 160 struct nvmet_port *r;
144 u32 numrec = 0; 161 u32 numrec = 0;
145 u16 status = 0; 162 u16 status = 0;
163 void *buffer;
164
165 /* Spec requires dword aligned offsets */
166 if (offset & 0x3) {
167 status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
168 goto out;
169 }
146 170
147 /* 171 /*
148 * Make sure we're passing at least a buffer of response header size. 172 * Make sure we're passing at least a buffer of response header size.
149 * If host provided data len is less than the header size, only the 173 * If host provided data len is less than the header size, only the
150 * number of bytes requested by host will be sent to host. 174 * number of bytes requested by host will be sent to host.
151 */ 175 */
152 hdr = kzalloc(alloc_len, GFP_KERNEL); 176 down_read(&nvmet_config_sem);
153 if (!hdr) { 177 alloc_len = sizeof(*hdr) + entry_size * discovery_log_entries(req);
178 buffer = kzalloc(alloc_len, GFP_KERNEL);
179 if (!buffer) {
180 up_read(&nvmet_config_sem);
154 status = NVME_SC_INTERNAL; 181 status = NVME_SC_INTERNAL;
155 goto out; 182 goto out;
156 } 183 }
157 184
158 down_read(&nvmet_config_sem); 185 hdr = buffer;
159 list_for_each_entry(p, &req->port->subsystems, entry) { 186 list_for_each_entry(p, &req->port->subsystems, entry) {
187 char traddr[NVMF_TRADDR_SIZE];
188
160 if (!nvmet_host_allowed(p->subsys, ctrl->hostnqn)) 189 if (!nvmet_host_allowed(p->subsys, ctrl->hostnqn))
161 continue; 190 continue;
162 if (residual_len >= entry_size) { 191
163 char traddr[NVMF_TRADDR_SIZE]; 192 nvmet_set_disc_traddr(req, req->port, traddr);
164 193 nvmet_format_discovery_entry(hdr, req->port,
165 nvmet_set_disc_traddr(req, req->port, traddr); 194 p->subsys->subsysnqn, traddr,
166 nvmet_format_discovery_entry(hdr, req->port, 195 NVME_NQN_NVME, numrec);
167 p->subsys->subsysnqn, traddr,
168 NVME_NQN_NVME, numrec);
169 residual_len -= entry_size;
170 }
171 numrec++; 196 numrec++;
172 } 197 }
173 198
174 list_for_each_entry(r, &req->port->referrals, entry) { 199 list_for_each_entry(r, &req->port->referrals, entry) {
175 if (residual_len >= entry_size) { 200 nvmet_format_discovery_entry(hdr, r,
176 nvmet_format_discovery_entry(hdr, r, 201 NVME_DISC_SUBSYS_NAME,
177 NVME_DISC_SUBSYS_NAME, 202 r->disc_addr.traddr,
178 r->disc_addr.traddr, 203 NVME_NQN_DISC, numrec);
179 NVME_NQN_DISC, numrec);
180 residual_len -= entry_size;
181 }
182 numrec++; 204 numrec++;
183 } 205 }
184 206
@@ -190,8 +212,8 @@ static void nvmet_execute_get_disc_log_page(struct nvmet_req *req)
190 212
191 up_read(&nvmet_config_sem); 213 up_read(&nvmet_config_sem);
192 214
193 status = nvmet_copy_to_sgl(req, 0, hdr, data_len); 215 status = nvmet_copy_to_sgl(req, 0, buffer + offset, data_len);
194 kfree(hdr); 216 kfree(buffer);
195out: 217out:
196 nvmet_req_complete(req, status); 218 nvmet_req_complete(req, status);
197} 219}
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index 51e49efd7849..1653d19b187f 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -428,6 +428,7 @@ u16 nvmet_copy_from_sgl(struct nvmet_req *req, off_t off, void *buf,
428u16 nvmet_zero_sgl(struct nvmet_req *req, off_t off, size_t len); 428u16 nvmet_zero_sgl(struct nvmet_req *req, off_t off, size_t len);
429 429
430u32 nvmet_get_log_page_len(struct nvme_command *cmd); 430u32 nvmet_get_log_page_len(struct nvme_command *cmd);
431u64 nvmet_get_log_page_offset(struct nvme_command *cmd);
431 432
432extern struct list_head *nvmet_ports; 433extern struct list_head *nvmet_ports;
433void nvmet_port_disc_changed(struct nvmet_port *port, 434void nvmet_port_disc_changed(struct nvmet_port *port,
diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c
index 8af01777d09c..f8cb7c23305b 100644
--- a/drivers/scsi/virtio_scsi.c
+++ b/drivers/scsi/virtio_scsi.c
@@ -793,6 +793,7 @@ static int virtscsi_probe(struct virtio_device *vdev)
793 793
794 /* We need to know how many queues before we allocate. */ 794 /* We need to know how many queues before we allocate. */
795 num_queues = virtscsi_config_get(vdev, num_queues) ? : 1; 795 num_queues = virtscsi_config_get(vdev, num_queues) ? : 1;
796 num_queues = min_t(unsigned int, nr_cpu_ids, num_queues);
796 797
797 num_targets = virtscsi_config_get(vdev, max_target) + 1; 798 num_targets = virtscsi_config_get(vdev, max_target) + 1;
798 799
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 78d3257435c0..24615c76c1d0 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -307,10 +307,10 @@ static void blkdev_bio_end_io(struct bio *bio)
307 struct blkdev_dio *dio = bio->bi_private; 307 struct blkdev_dio *dio = bio->bi_private;
308 bool should_dirty = dio->should_dirty; 308 bool should_dirty = dio->should_dirty;
309 309
310 if (dio->multi_bio && !atomic_dec_and_test(&dio->ref)) { 310 if (bio->bi_status && !dio->bio.bi_status)
311 if (bio->bi_status && !dio->bio.bi_status) 311 dio->bio.bi_status = bio->bi_status;
312 dio->bio.bi_status = bio->bi_status; 312
313 } else { 313 if (!dio->multi_bio || atomic_dec_and_test(&dio->ref)) {
314 if (!dio->is_sync) { 314 if (!dio->is_sync) {
315 struct kiocb *iocb = dio->iocb; 315 struct kiocb *iocb = dio->iocb;
316 ssize_t ret; 316 ssize_t ret;
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 07d6ef195d05..89aa8412b5f5 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2245,6 +2245,10 @@ static int io_sq_offload_start(struct io_ring_ctx *ctx,
2245 goto err; 2245 goto err;
2246 2246
2247 if (ctx->flags & IORING_SETUP_SQPOLL) { 2247 if (ctx->flags & IORING_SETUP_SQPOLL) {
2248 ret = -EPERM;
2249 if (!capable(CAP_SYS_ADMIN))
2250 goto err;
2251
2248 if (p->flags & IORING_SETUP_SQ_AFF) { 2252 if (p->flags & IORING_SETUP_SQ_AFF) {
2249 int cpu; 2253 int cpu;
2250 2254
diff --git a/include/linux/bio.h b/include/linux/bio.h
index bb6090aa165d..e584673c1881 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -120,19 +120,23 @@ static inline bool bio_full(struct bio *bio)
120 return bio->bi_vcnt >= bio->bi_max_vecs; 120 return bio->bi_vcnt >= bio->bi_max_vecs;
121} 121}
122 122
123#define mp_bvec_for_each_segment(bv, bvl, i, iter_all) \ 123static inline bool bio_next_segment(const struct bio *bio,
124 for (bv = bvec_init_iter_all(&iter_all); \ 124 struct bvec_iter_all *iter)
125 (iter_all.done < (bvl)->bv_len) && \ 125{
126 (mp_bvec_next_segment((bvl), &iter_all), 1); \ 126 if (iter->idx >= bio->bi_vcnt)
127 iter_all.done += bv->bv_len, i += 1) 127 return false;
128
129 bvec_advance(&bio->bi_io_vec[iter->idx], iter);
130 return true;
131}
128 132
129/* 133/*
130 * drivers should _never_ use the all version - the bio may have been split 134 * drivers should _never_ use the all version - the bio may have been split
131 * before it got to the driver and the driver won't own all of it 135 * before it got to the driver and the driver won't own all of it
132 */ 136 */
133#define bio_for_each_segment_all(bvl, bio, i, iter_all) \ 137#define bio_for_each_segment_all(bvl, bio, i, iter) \
134 for (i = 0, iter_all.idx = 0; iter_all.idx < (bio)->bi_vcnt; iter_all.idx++) \ 138 for (i = 0, bvl = bvec_init_iter_all(&iter); \
135 mp_bvec_for_each_segment(bvl, &((bio)->bi_io_vec[iter_all.idx]), i, iter_all) 139 bio_next_segment((bio), &iter); i++)
136 140
137static inline void bio_advance_iter(struct bio *bio, struct bvec_iter *iter, 141static inline void bio_advance_iter(struct bio *bio, struct bvec_iter *iter,
138 unsigned bytes) 142 unsigned bytes)
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index cb2aa7ecafff..db29928de467 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -302,6 +302,7 @@ void blk_mq_requeue_request(struct request *rq, bool kick_requeue_list);
302void blk_mq_kick_requeue_list(struct request_queue *q); 302void blk_mq_kick_requeue_list(struct request_queue *q);
303void blk_mq_delay_kick_requeue_list(struct request_queue *q, unsigned long msecs); 303void blk_mq_delay_kick_requeue_list(struct request_queue *q, unsigned long msecs);
304bool blk_mq_complete_request(struct request *rq); 304bool blk_mq_complete_request(struct request *rq);
305void blk_mq_complete_request_sync(struct request *rq);
305bool blk_mq_bio_list_merge(struct request_queue *q, struct list_head *list, 306bool blk_mq_bio_list_merge(struct request_queue *q, struct list_head *list,
306 struct bio *bio); 307 struct bio *bio);
307bool blk_mq_queue_stopped(struct request_queue *q); 308bool blk_mq_queue_stopped(struct request_queue *q);
diff --git a/include/linux/bvec.h b/include/linux/bvec.h
index f6275c4da13a..3bc91879e1e2 100644
--- a/include/linux/bvec.h
+++ b/include/linux/bvec.h
@@ -145,18 +145,18 @@ static inline bool bvec_iter_advance(const struct bio_vec *bv,
145 145
146static inline struct bio_vec *bvec_init_iter_all(struct bvec_iter_all *iter_all) 146static inline struct bio_vec *bvec_init_iter_all(struct bvec_iter_all *iter_all)
147{ 147{
148 iter_all->bv.bv_page = NULL;
149 iter_all->done = 0; 148 iter_all->done = 0;
149 iter_all->idx = 0;
150 150
151 return &iter_all->bv; 151 return &iter_all->bv;
152} 152}
153 153
154static inline void mp_bvec_next_segment(const struct bio_vec *bvec, 154static inline void bvec_advance(const struct bio_vec *bvec,
155 struct bvec_iter_all *iter_all) 155 struct bvec_iter_all *iter_all)
156{ 156{
157 struct bio_vec *bv = &iter_all->bv; 157 struct bio_vec *bv = &iter_all->bv;
158 158
159 if (bv->bv_page) { 159 if (iter_all->done) {
160 bv->bv_page = nth_page(bv->bv_page, 1); 160 bv->bv_page = nth_page(bv->bv_page, 1);
161 bv->bv_offset = 0; 161 bv->bv_offset = 0;
162 } else { 162 } else {
@@ -165,6 +165,12 @@ static inline void mp_bvec_next_segment(const struct bio_vec *bvec,
165 } 165 }
166 bv->bv_len = min_t(unsigned int, PAGE_SIZE - bv->bv_offset, 166 bv->bv_len = min_t(unsigned int, PAGE_SIZE - bv->bv_offset,
167 bvec->bv_len - iter_all->done); 167 bvec->bv_len - iter_all->done);
168 iter_all->done += bv->bv_len;
169
170 if (iter_all->done == bvec->bv_len) {
171 iter_all->idx++;
172 iter_all->done = 0;
173 }
168} 174}
169 175
170/* 176/*
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index baa49e6a23cc..c40720cb59ac 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -967,8 +967,13 @@ struct nvme_get_log_page_command {
967 __le16 numdl; 967 __le16 numdl;
968 __le16 numdu; 968 __le16 numdu;
969 __u16 rsvd11; 969 __u16 rsvd11;
970 __le32 lpol; 970 union {
971 __le32 lpou; 971 struct {
972 __le32 lpol;
973 __le32 lpou;
974 };
975 __le64 lpo;
976 };
972 __u32 rsvd14[2]; 977 __u32 rsvd14[2];
973}; 978};
974 979
diff --git a/tools/io_uring/io_uring-bench.c b/tools/io_uring/io_uring-bench.c
index 512306a37531..0f257139b003 100644
--- a/tools/io_uring/io_uring-bench.c
+++ b/tools/io_uring/io_uring-bench.c
@@ -32,10 +32,6 @@
32#include "liburing.h" 32#include "liburing.h"
33#include "barrier.h" 33#include "barrier.h"
34 34
35#ifndef IOCQE_FLAG_CACHEHIT
36#define IOCQE_FLAG_CACHEHIT (1U << 0)
37#endif
38
39#define min(a, b) ((a < b) ? (a) : (b)) 35#define min(a, b) ((a < b) ? (a) : (b))
40 36
41struct io_sq_ring { 37struct io_sq_ring {
@@ -85,7 +81,6 @@ struct submitter {
85 unsigned long reaps; 81 unsigned long reaps;
86 unsigned long done; 82 unsigned long done;
87 unsigned long calls; 83 unsigned long calls;
88 unsigned long cachehit, cachemiss;
89 volatile int finish; 84 volatile int finish;
90 85
91 __s32 *fds; 86 __s32 *fds;
@@ -270,10 +265,6 @@ static int reap_events(struct submitter *s)
270 return -1; 265 return -1;
271 } 266 }
272 } 267 }
273 if (cqe->flags & IOCQE_FLAG_CACHEHIT)
274 s->cachehit++;
275 else
276 s->cachemiss++;
277 reaped++; 268 reaped++;
278 head++; 269 head++;
279 } while (1); 270 } while (1);
@@ -489,7 +480,7 @@ static void file_depths(char *buf)
489int main(int argc, char *argv[]) 480int main(int argc, char *argv[])
490{ 481{
491 struct submitter *s = &submitters[0]; 482 struct submitter *s = &submitters[0];
492 unsigned long done, calls, reap, cache_hit, cache_miss; 483 unsigned long done, calls, reap;
493 int err, i, flags, fd; 484 int err, i, flags, fd;
494 char *fdepths; 485 char *fdepths;
495 void *ret; 486 void *ret;
@@ -569,44 +560,29 @@ int main(int argc, char *argv[])
569 pthread_create(&s->thread, NULL, submitter_fn, s); 560 pthread_create(&s->thread, NULL, submitter_fn, s);
570 561
571 fdepths = malloc(8 * s->nr_files); 562 fdepths = malloc(8 * s->nr_files);
572 cache_hit = cache_miss = reap = calls = done = 0; 563 reap = calls = done = 0;
573 do { 564 do {
574 unsigned long this_done = 0; 565 unsigned long this_done = 0;
575 unsigned long this_reap = 0; 566 unsigned long this_reap = 0;
576 unsigned long this_call = 0; 567 unsigned long this_call = 0;
577 unsigned long this_cache_hit = 0;
578 unsigned long this_cache_miss = 0;
579 unsigned long rpc = 0, ipc = 0; 568 unsigned long rpc = 0, ipc = 0;
580 double hit = 0.0;
581 569
582 sleep(1); 570 sleep(1);
583 this_done += s->done; 571 this_done += s->done;
584 this_call += s->calls; 572 this_call += s->calls;
585 this_reap += s->reaps; 573 this_reap += s->reaps;
586 this_cache_hit += s->cachehit;
587 this_cache_miss += s->cachemiss;
588 if (this_cache_hit && this_cache_miss) {
589 unsigned long hits, total;
590
591 hits = this_cache_hit - cache_hit;
592 total = hits + this_cache_miss - cache_miss;
593 hit = (double) hits / (double) total;
594 hit *= 100.0;
595 }
596 if (this_call - calls) { 574 if (this_call - calls) {
597 rpc = (this_done - done) / (this_call - calls); 575 rpc = (this_done - done) / (this_call - calls);
598 ipc = (this_reap - reap) / (this_call - calls); 576 ipc = (this_reap - reap) / (this_call - calls);
599 } else 577 } else
600 rpc = ipc = -1; 578 rpc = ipc = -1;
601 file_depths(fdepths); 579 file_depths(fdepths);
602 printf("IOPS=%lu, IOS/call=%ld/%ld, inflight=%u (%s), Cachehit=%0.2f%%\n", 580 printf("IOPS=%lu, IOS/call=%ld/%ld, inflight=%u (%s)\n",
603 this_done - done, rpc, ipc, s->inflight, 581 this_done - done, rpc, ipc, s->inflight,
604 fdepths, hit); 582 fdepths);
605 done = this_done; 583 done = this_done;
606 calls = this_call; 584 calls = this_call;
607 reap = this_reap; 585 reap = this_reap;
608 cache_hit = s->cachehit;
609 cache_miss = s->cachemiss;
610 } while (!finish); 586 } while (!finish);
611 587
612 pthread_join(s->thread, &ret); 588 pthread_join(s->thread, &ret);