summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChristoph Hellwig <hch@lst.de>2016-12-08 17:20:32 -0500
committerJens Axboe <axboe@fb.com>2016-12-09 10:30:51 -0500
commitf9d03f96b988002027d4b28ea1b7a24729a4c9b5 (patch)
treee4995a30bbe58290594e2ef29de5ae2785c6c9db
parentbe07e14f96e3121483339a64d917fddb3b86ba98 (diff)
block: improve handling of the magic discard payload
Instead of allocating a single unused biovec for discard requests, send them down without any payload. Instead we allow the driver to add a "special" payload using a biovec embedded into struct request (unioned over other fields never used while in the driver), and overloading the number of segments for this case. This has a couple of advantages: - we don't have to allocate the bio_vec - the amount of special casing for discard requests in the block layer is significantly reduced - using this same scheme for other request types is trivial, which will be important for implementing the new WRITE_ZEROES op on devices where it actually requires a payload (e.g. SCSI) - we can get rid of playing games with the request length, as we'll never touch it and completions will work just fine - it will allow us to support ranged discard operations in the future by merging non-contiguous discard bios into a single request - last but not least it removes a lot of code This patch is the common base for my WIP series for ranges discards and to remove discard_zeroes_data in favor of always using REQ_OP_WRITE_ZEROES, so it would be good to get it in quickly. Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Jens Axboe <axboe@fb.com>
-rw-r--r--block/bio.c10
-rw-r--r--block/blk-core.c34
-rw-r--r--block/blk-lib.c2
-rw-r--r--block/blk-merge.c53
-rw-r--r--drivers/nvme/host/core.c17
-rw-r--r--drivers/nvme/host/nvme.h6
-rw-r--r--drivers/nvme/host/pci.c27
-rw-r--r--drivers/nvme/host/rdma.c13
-rw-r--r--drivers/nvme/target/loop.c4
-rw-r--r--drivers/scsi/scsi_lib.c6
-rw-r--r--drivers/scsi/sd.c24
-rw-r--r--include/linux/bio.h3
-rw-r--r--include/linux/blkdev.h15
13 files changed, 76 insertions, 138 deletions
diff --git a/block/bio.c b/block/bio.c
index 83db1f37fd0b..2b375020fc49 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1840,15 +1840,7 @@ struct bio *bio_split(struct bio *bio, int sectors,
1840 BUG_ON(sectors <= 0); 1840 BUG_ON(sectors <= 0);
1841 BUG_ON(sectors >= bio_sectors(bio)); 1841 BUG_ON(sectors >= bio_sectors(bio));
1842 1842
1843 /* 1843 split = bio_clone_fast(bio, gfp, bs);
1844 * Discards need a mutable bio_vec to accommodate the payload
1845 * required by the DSM TRIM and UNMAP commands.
1846 */
1847 if (bio_op(bio) == REQ_OP_DISCARD || bio_op(bio) == REQ_OP_SECURE_ERASE)
1848 split = bio_clone_bioset(bio, gfp, bs);
1849 else
1850 split = bio_clone_fast(bio, gfp, bs);
1851
1852 if (!split) 1844 if (!split)
1853 return NULL; 1845 return NULL;
1854 1846
diff --git a/block/blk-core.c b/block/blk-core.c
index 4b7ec5958055..bd642a43b98b 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1475,38 +1475,6 @@ void blk_put_request(struct request *req)
1475} 1475}
1476EXPORT_SYMBOL(blk_put_request); 1476EXPORT_SYMBOL(blk_put_request);
1477 1477
1478/**
1479 * blk_add_request_payload - add a payload to a request
1480 * @rq: request to update
1481 * @page: page backing the payload
1482 * @offset: offset in page
1483 * @len: length of the payload.
1484 *
1485 * This allows to later add a payload to an already submitted request by
1486 * a block driver. The driver needs to take care of freeing the payload
1487 * itself.
1488 *
1489 * Note that this is a quite horrible hack and nothing but handling of
1490 * discard requests should ever use it.
1491 */
1492void blk_add_request_payload(struct request *rq, struct page *page,
1493 int offset, unsigned int len)
1494{
1495 struct bio *bio = rq->bio;
1496
1497 bio->bi_io_vec->bv_page = page;
1498 bio->bi_io_vec->bv_offset = offset;
1499 bio->bi_io_vec->bv_len = len;
1500
1501 bio->bi_iter.bi_size = len;
1502 bio->bi_vcnt = 1;
1503 bio->bi_phys_segments = 1;
1504
1505 rq->__data_len = rq->resid_len = len;
1506 rq->nr_phys_segments = 1;
1507}
1508EXPORT_SYMBOL_GPL(blk_add_request_payload);
1509
1510bool bio_attempt_back_merge(struct request_queue *q, struct request *req, 1478bool bio_attempt_back_merge(struct request_queue *q, struct request *req,
1511 struct bio *bio) 1479 struct bio *bio)
1512{ 1480{
@@ -2642,6 +2610,8 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
2642 return false; 2610 return false;
2643 } 2611 }
2644 2612
2613 WARN_ON_ONCE(req->rq_flags & RQF_SPECIAL_PAYLOAD);
2614
2645 req->__data_len -= total_bytes; 2615 req->__data_len -= total_bytes;
2646 2616
2647 /* update sector only for requests with clear definition of sector */ 2617 /* update sector only for requests with clear definition of sector */
diff --git a/block/blk-lib.c b/block/blk-lib.c
index 510a6fb15318..ed89c8f4b2a0 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -80,7 +80,7 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
80 req_sects = end_sect - sector; 80 req_sects = end_sect - sector;
81 } 81 }
82 82
83 bio = next_bio(bio, 1, gfp_mask); 83 bio = next_bio(bio, 0, gfp_mask);
84 bio->bi_iter.bi_sector = sector; 84 bio->bi_iter.bi_sector = sector;
85 bio->bi_bdev = bdev; 85 bio->bi_bdev = bdev;
86 bio_set_op_attrs(bio, op, 0); 86 bio_set_op_attrs(bio, op, 0);
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 1002afdfee99..182398cb1524 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -241,18 +241,13 @@ static unsigned int __blk_recalc_rq_segments(struct request_queue *q,
241 if (!bio) 241 if (!bio)
242 return 0; 242 return 0;
243 243
244 /*
245 * This should probably be returning 0, but blk_add_request_payload()
246 * (Christoph!!!!)
247 */
248 switch (bio_op(bio)) { 244 switch (bio_op(bio)) {
249 case REQ_OP_DISCARD: 245 case REQ_OP_DISCARD:
250 case REQ_OP_SECURE_ERASE: 246 case REQ_OP_SECURE_ERASE:
251 case REQ_OP_WRITE_SAME:
252 case REQ_OP_WRITE_ZEROES: 247 case REQ_OP_WRITE_ZEROES:
248 return 0;
249 case REQ_OP_WRITE_SAME:
253 return 1; 250 return 1;
254 default:
255 break;
256 } 251 }
257 252
258 fbio = bio; 253 fbio = bio;
@@ -410,39 +405,21 @@ new_segment:
410 *bvprv = *bvec; 405 *bvprv = *bvec;
411} 406}
412 407
408static inline int __blk_bvec_map_sg(struct request_queue *q, struct bio_vec bv,
409 struct scatterlist *sglist, struct scatterlist **sg)
410{
411 *sg = sglist;
412 sg_set_page(*sg, bv.bv_page, bv.bv_len, bv.bv_offset);
413 return 1;
414}
415
413static int __blk_bios_map_sg(struct request_queue *q, struct bio *bio, 416static int __blk_bios_map_sg(struct request_queue *q, struct bio *bio,
414 struct scatterlist *sglist, 417 struct scatterlist *sglist,
415 struct scatterlist **sg) 418 struct scatterlist **sg)
416{ 419{
417 struct bio_vec bvec, bvprv = { NULL }; 420 struct bio_vec bvec, bvprv = { NULL };
418 struct bvec_iter iter; 421 struct bvec_iter iter;
419 int nsegs, cluster; 422 int cluster = blk_queue_cluster(q), nsegs = 0;
420
421 nsegs = 0;
422 cluster = blk_queue_cluster(q);
423
424 switch (bio_op(bio)) {
425 case REQ_OP_DISCARD:
426 case REQ_OP_SECURE_ERASE:
427 case REQ_OP_WRITE_ZEROES:
428 /*
429 * This is a hack - drivers should be neither modifying the
430 * biovec, nor relying on bi_vcnt - but because of
431 * blk_add_request_payload(), a discard bio may or may not have
432 * a payload we need to set up here (thank you Christoph) and
433 * bi_vcnt is really the only way of telling if we need to.
434 */
435 if (!bio->bi_vcnt)
436 return 0;
437 /* Fall through */
438 case REQ_OP_WRITE_SAME:
439 *sg = sglist;
440 bvec = bio_iovec(bio);
441 sg_set_page(*sg, bvec.bv_page, bvec.bv_len, bvec.bv_offset);
442 return 1;
443 default:
444 break;
445 }
446 423
447 for_each_bio(bio) 424 for_each_bio(bio)
448 bio_for_each_segment(bvec, bio, iter) 425 bio_for_each_segment(bvec, bio, iter)
@@ -462,7 +439,11 @@ int blk_rq_map_sg(struct request_queue *q, struct request *rq,
462 struct scatterlist *sg = NULL; 439 struct scatterlist *sg = NULL;
463 int nsegs = 0; 440 int nsegs = 0;
464 441
465 if (rq->bio) 442 if (rq->rq_flags & RQF_SPECIAL_PAYLOAD)
443 nsegs = __blk_bvec_map_sg(q, rq->special_vec, sglist, &sg);
444 else if (rq->bio && bio_op(rq->bio) == REQ_OP_WRITE_SAME)
445 nsegs = __blk_bvec_map_sg(q, bio_iovec(rq->bio), sglist, &sg);
446 else if (rq->bio)
466 nsegs = __blk_bios_map_sg(q, rq->bio, sglist, &sg); 447 nsegs = __blk_bios_map_sg(q, rq->bio, sglist, &sg);
467 448
468 if (unlikely(rq->rq_flags & RQF_COPY_USER) && 449 if (unlikely(rq->rq_flags & RQF_COPY_USER) &&
@@ -495,7 +476,7 @@ int blk_rq_map_sg(struct request_queue *q, struct request *rq,
495 * Something must have been wrong if the figured number of 476 * Something must have been wrong if the figured number of
496 * segment is bigger than number of req's physical segments 477 * segment is bigger than number of req's physical segments
497 */ 478 */
498 WARN_ON(nsegs > rq->nr_phys_segments); 479 WARN_ON(nsegs > blk_rq_nr_phys_segments(rq));
499 480
500 return nsegs; 481 return nsegs;
501} 482}
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 1b48514fbe99..3b1d6478dcfb 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -239,8 +239,6 @@ static inline int nvme_setup_discard(struct nvme_ns *ns, struct request *req,
239 struct nvme_command *cmnd) 239 struct nvme_command *cmnd)
240{ 240{
241 struct nvme_dsm_range *range; 241 struct nvme_dsm_range *range;
242 struct page *page;
243 int offset;
244 unsigned int nr_bytes = blk_rq_bytes(req); 242 unsigned int nr_bytes = blk_rq_bytes(req);
245 243
246 range = kmalloc(sizeof(*range), GFP_ATOMIC); 244 range = kmalloc(sizeof(*range), GFP_ATOMIC);
@@ -257,17 +255,10 @@ static inline int nvme_setup_discard(struct nvme_ns *ns, struct request *req,
257 cmnd->dsm.nr = 0; 255 cmnd->dsm.nr = 0;
258 cmnd->dsm.attributes = cpu_to_le32(NVME_DSMGMT_AD); 256 cmnd->dsm.attributes = cpu_to_le32(NVME_DSMGMT_AD);
259 257
260 req->completion_data = range; 258 req->special_vec.bv_page = virt_to_page(range);
261 page = virt_to_page(range); 259 req->special_vec.bv_offset = offset_in_page(range);
262 offset = offset_in_page(range); 260 req->special_vec.bv_len = sizeof(*range);
263 blk_add_request_payload(req, page, offset, sizeof(*range)); 261 req->rq_flags |= RQF_SPECIAL_PAYLOAD;
264
265 /*
266 * we set __data_len back to the size of the area to be discarded
267 * on disk. This allows us to report completion on the full amount
268 * of blocks described by the request.
269 */
270 req->__data_len = nr_bytes;
271 262
272 return BLK_MQ_RQ_QUEUE_OK; 263 return BLK_MQ_RQ_QUEUE_OK;
273} 264}
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index a3d6ffd874af..bd5321441d12 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -236,8 +236,10 @@ static inline unsigned nvme_map_len(struct request *rq)
236 236
237static inline void nvme_cleanup_cmd(struct request *req) 237static inline void nvme_cleanup_cmd(struct request *req)
238{ 238{
239 if (req_op(req) == REQ_OP_DISCARD) 239 if (req->rq_flags & RQF_SPECIAL_PAYLOAD) {
240 kfree(req->completion_data); 240 kfree(page_address(req->special_vec.bv_page) +
241 req->special_vec.bv_offset);
242 }
241} 243}
242 244
243static inline int nvme_error_status(u16 status) 245static inline int nvme_error_status(u16 status)
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 82b9b3f1f21d..717d6ea47ee4 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -302,14 +302,14 @@ static void __nvme_submit_cmd(struct nvme_queue *nvmeq,
302static __le64 **iod_list(struct request *req) 302static __le64 **iod_list(struct request *req)
303{ 303{
304 struct nvme_iod *iod = blk_mq_rq_to_pdu(req); 304 struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
305 return (__le64 **)(iod->sg + req->nr_phys_segments); 305 return (__le64 **)(iod->sg + blk_rq_nr_phys_segments(req));
306} 306}
307 307
308static int nvme_init_iod(struct request *rq, unsigned size, 308static int nvme_init_iod(struct request *rq, unsigned size,
309 struct nvme_dev *dev) 309 struct nvme_dev *dev)
310{ 310{
311 struct nvme_iod *iod = blk_mq_rq_to_pdu(rq); 311 struct nvme_iod *iod = blk_mq_rq_to_pdu(rq);
312 int nseg = rq->nr_phys_segments; 312 int nseg = blk_rq_nr_phys_segments(rq);
313 313
314 if (nseg > NVME_INT_PAGES || size > NVME_INT_BYTES(dev)) { 314 if (nseg > NVME_INT_PAGES || size > NVME_INT_BYTES(dev)) {
315 iod->sg = kmalloc(nvme_iod_alloc_size(dev, size, nseg), GFP_ATOMIC); 315 iod->sg = kmalloc(nvme_iod_alloc_size(dev, size, nseg), GFP_ATOMIC);
@@ -339,8 +339,6 @@ static void nvme_free_iod(struct nvme_dev *dev, struct request *req)
339 __le64 **list = iod_list(req); 339 __le64 **list = iod_list(req);
340 dma_addr_t prp_dma = iod->first_dma; 340 dma_addr_t prp_dma = iod->first_dma;
341 341
342 nvme_cleanup_cmd(req);
343
344 if (iod->npages == 0) 342 if (iod->npages == 0)
345 dma_pool_free(dev->prp_small_pool, list[0], prp_dma); 343 dma_pool_free(dev->prp_small_pool, list[0], prp_dma);
346 for (i = 0; i < iod->npages; i++) { 344 for (i = 0; i < iod->npages; i++) {
@@ -510,7 +508,7 @@ static int nvme_map_data(struct nvme_dev *dev, struct request *req,
510 DMA_TO_DEVICE : DMA_FROM_DEVICE; 508 DMA_TO_DEVICE : DMA_FROM_DEVICE;
511 int ret = BLK_MQ_RQ_QUEUE_ERROR; 509 int ret = BLK_MQ_RQ_QUEUE_ERROR;
512 510
513 sg_init_table(iod->sg, req->nr_phys_segments); 511 sg_init_table(iod->sg, blk_rq_nr_phys_segments(req));
514 iod->nents = blk_rq_map_sg(q, req, iod->sg); 512 iod->nents = blk_rq_map_sg(q, req, iod->sg);
515 if (!iod->nents) 513 if (!iod->nents)
516 goto out; 514 goto out;
@@ -566,6 +564,7 @@ static void nvme_unmap_data(struct nvme_dev *dev, struct request *req)
566 } 564 }
567 } 565 }
568 566
567 nvme_cleanup_cmd(req);
569 nvme_free_iod(dev, req); 568 nvme_free_iod(dev, req);
570} 569}
571 570
@@ -596,20 +595,20 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
596 } 595 }
597 } 596 }
598 597
599 map_len = nvme_map_len(req); 598 ret = nvme_setup_cmd(ns, req, &cmnd);
600 ret = nvme_init_iod(req, map_len, dev);
601 if (ret != BLK_MQ_RQ_QUEUE_OK) 599 if (ret != BLK_MQ_RQ_QUEUE_OK)
602 return ret; 600 return ret;
603 601
604 ret = nvme_setup_cmd(ns, req, &cmnd); 602 map_len = nvme_map_len(req);
603 ret = nvme_init_iod(req, map_len, dev);
605 if (ret != BLK_MQ_RQ_QUEUE_OK) 604 if (ret != BLK_MQ_RQ_QUEUE_OK)
606 goto out; 605 goto out_free_cmd;
607 606
608 if (req->nr_phys_segments) 607 if (blk_rq_nr_phys_segments(req))
609 ret = nvme_map_data(dev, req, map_len, &cmnd); 608 ret = nvme_map_data(dev, req, map_len, &cmnd);
610 609
611 if (ret != BLK_MQ_RQ_QUEUE_OK) 610 if (ret != BLK_MQ_RQ_QUEUE_OK)
612 goto out; 611 goto out_cleanup_iod;
613 612
614 blk_mq_start_request(req); 613 blk_mq_start_request(req);
615 614
@@ -620,14 +619,16 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
620 else 619 else
621 ret = BLK_MQ_RQ_QUEUE_ERROR; 620 ret = BLK_MQ_RQ_QUEUE_ERROR;
622 spin_unlock_irq(&nvmeq->q_lock); 621 spin_unlock_irq(&nvmeq->q_lock);
623 goto out; 622 goto out_cleanup_iod;
624 } 623 }
625 __nvme_submit_cmd(nvmeq, &cmnd); 624 __nvme_submit_cmd(nvmeq, &cmnd);
626 nvme_process_cq(nvmeq); 625 nvme_process_cq(nvmeq);
627 spin_unlock_irq(&nvmeq->q_lock); 626 spin_unlock_irq(&nvmeq->q_lock);
628 return BLK_MQ_RQ_QUEUE_OK; 627 return BLK_MQ_RQ_QUEUE_OK;
629out: 628out_cleanup_iod:
630 nvme_free_iod(dev, req); 629 nvme_free_iod(dev, req);
630out_free_cmd:
631 nvme_cleanup_cmd(req);
631 return ret; 632 return ret;
632} 633}
633 634
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index b037d0cb2a7e..251101bf982f 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -952,8 +952,7 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue,
952 struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); 952 struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
953 struct nvme_rdma_device *dev = queue->device; 953 struct nvme_rdma_device *dev = queue->device;
954 struct ib_device *ibdev = dev->dev; 954 struct ib_device *ibdev = dev->dev;
955 int nents, count; 955 int count, ret;
956 int ret;
957 956
958 req->num_sge = 1; 957 req->num_sge = 1;
959 req->inline_data = false; 958 req->inline_data = false;
@@ -965,16 +964,14 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue,
965 return nvme_rdma_set_sg_null(c); 964 return nvme_rdma_set_sg_null(c);
966 965
967 req->sg_table.sgl = req->first_sgl; 966 req->sg_table.sgl = req->first_sgl;
968 ret = sg_alloc_table_chained(&req->sg_table, rq->nr_phys_segments, 967 ret = sg_alloc_table_chained(&req->sg_table,
969 req->sg_table.sgl); 968 blk_rq_nr_phys_segments(rq), req->sg_table.sgl);
970 if (ret) 969 if (ret)
971 return -ENOMEM; 970 return -ENOMEM;
972 971
973 nents = blk_rq_map_sg(rq->q, rq, req->sg_table.sgl); 972 req->nents = blk_rq_map_sg(rq->q, rq, req->sg_table.sgl);
974 BUG_ON(nents > rq->nr_phys_segments);
975 req->nents = nents;
976 973
977 count = ib_dma_map_sg(ibdev, req->sg_table.sgl, nents, 974 count = ib_dma_map_sg(ibdev, req->sg_table.sgl, req->nents,
978 rq_data_dir(rq) == WRITE ? DMA_TO_DEVICE : DMA_FROM_DEVICE); 975 rq_data_dir(rq) == WRITE ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
979 if (unlikely(count <= 0)) { 976 if (unlikely(count <= 0)) {
980 sg_free_table_chained(&req->sg_table, true); 977 sg_free_table_chained(&req->sg_table, true);
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index 57ded6b3ed8a..9aaa70071ae5 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -185,13 +185,13 @@ static int nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx,
185 if (blk_rq_bytes(req)) { 185 if (blk_rq_bytes(req)) {
186 iod->sg_table.sgl = iod->first_sgl; 186 iod->sg_table.sgl = iod->first_sgl;
187 ret = sg_alloc_table_chained(&iod->sg_table, 187 ret = sg_alloc_table_chained(&iod->sg_table,
188 req->nr_phys_segments, iod->sg_table.sgl); 188 blk_rq_nr_phys_segments(req),
189 iod->sg_table.sgl);
189 if (ret) 190 if (ret)
190 return BLK_MQ_RQ_QUEUE_BUSY; 191 return BLK_MQ_RQ_QUEUE_BUSY;
191 192
192 iod->req.sg = iod->sg_table.sgl; 193 iod->req.sg = iod->sg_table.sgl;
193 iod->req.sg_cnt = blk_rq_map_sg(req->q, req, iod->sg_table.sgl); 194 iod->req.sg_cnt = blk_rq_map_sg(req->q, req, iod->sg_table.sgl);
194 BUG_ON(iod->req.sg_cnt > req->nr_phys_segments);
195 } 195 }
196 196
197 blk_mq_start_request(req); 197 blk_mq_start_request(req);
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 47a5c8783b89..9a8ccff1121f 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1007,8 +1007,8 @@ static int scsi_init_sgtable(struct request *req, struct scsi_data_buffer *sdb)
1007 /* 1007 /*
1008 * If sg table allocation fails, requeue request later. 1008 * If sg table allocation fails, requeue request later.
1009 */ 1009 */
1010 if (unlikely(sg_alloc_table_chained(&sdb->table, req->nr_phys_segments, 1010 if (unlikely(sg_alloc_table_chained(&sdb->table,
1011 sdb->table.sgl))) 1011 blk_rq_nr_phys_segments(req), sdb->table.sgl)))
1012 return BLKPREP_DEFER; 1012 return BLKPREP_DEFER;
1013 1013
1014 /* 1014 /*
@@ -1040,7 +1040,7 @@ int scsi_init_io(struct scsi_cmnd *cmd)
1040 bool is_mq = (rq->mq_ctx != NULL); 1040 bool is_mq = (rq->mq_ctx != NULL);
1041 int error; 1041 int error;
1042 1042
1043 BUG_ON(!rq->nr_phys_segments); 1043 BUG_ON(!blk_rq_nr_phys_segments(rq));
1044 1044
1045 error = scsi_init_sgtable(rq, &cmd->sdb); 1045 error = scsi_init_sgtable(rq, &cmd->sdb);
1046 if (error) 1046 if (error)
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 65738b0aad36..079c2d9759fb 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -716,7 +716,6 @@ static int sd_setup_discard_cmnd(struct scsi_cmnd *cmd)
716 struct scsi_disk *sdkp = scsi_disk(rq->rq_disk); 716 struct scsi_disk *sdkp = scsi_disk(rq->rq_disk);
717 sector_t sector = blk_rq_pos(rq); 717 sector_t sector = blk_rq_pos(rq);
718 unsigned int nr_sectors = blk_rq_sectors(rq); 718 unsigned int nr_sectors = blk_rq_sectors(rq);
719 unsigned int nr_bytes = blk_rq_bytes(rq);
720 unsigned int len; 719 unsigned int len;
721 int ret; 720 int ret;
722 char *buf; 721 char *buf;
@@ -772,24 +771,19 @@ static int sd_setup_discard_cmnd(struct scsi_cmnd *cmd)
772 goto out; 771 goto out;
773 } 772 }
774 773
775 rq->completion_data = page;
776 rq->timeout = SD_TIMEOUT; 774 rq->timeout = SD_TIMEOUT;
777 775
778 cmd->transfersize = len; 776 cmd->transfersize = len;
779 cmd->allowed = SD_MAX_RETRIES; 777 cmd->allowed = SD_MAX_RETRIES;
780 778
781 /* 779 rq->special_vec.bv_page = page;
782 * Initially __data_len is set to the amount of data that needs to be 780 rq->special_vec.bv_offset = 0;
783 * transferred to the target. This amount depends on whether WRITE SAME 781 rq->special_vec.bv_len = len;
784 * or UNMAP is being used. After the scatterlist has been mapped by 782
785 * scsi_init_io() we set __data_len to the size of the area to be 783 rq->rq_flags |= RQF_SPECIAL_PAYLOAD;
786 * discarded on disk. This allows us to report completion on the full 784 rq->resid_len = len;
787 * amount of blocks described by the request.
788 */
789 blk_add_request_payload(rq, page, 0, len);
790 ret = scsi_init_io(cmd);
791 rq->__data_len = nr_bytes;
792 785
786 ret = scsi_init_io(cmd);
793out: 787out:
794 if (ret != BLKPREP_OK) 788 if (ret != BLKPREP_OK)
795 __free_page(page); 789 __free_page(page);
@@ -1182,8 +1176,8 @@ static void sd_uninit_command(struct scsi_cmnd *SCpnt)
1182{ 1176{
1183 struct request *rq = SCpnt->request; 1177 struct request *rq = SCpnt->request;
1184 1178
1185 if (req_op(rq) == REQ_OP_DISCARD) 1179 if (rq->rq_flags & RQF_SPECIAL_PAYLOAD)
1186 __free_page(rq->completion_data); 1180 __free_page(rq->special_vec.bv_page);
1187 1181
1188 if (SCpnt->cmnd != rq->cmd) { 1182 if (SCpnt->cmnd != rq->cmd) {
1189 mempool_free(SCpnt->cmnd, sd_cdb_pool); 1183 mempool_free(SCpnt->cmnd, sd_cdb_pool);
diff --git a/include/linux/bio.h b/include/linux/bio.h
index b15323934a29..7cf8a6c70a3f 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -197,8 +197,9 @@ static inline unsigned bio_segments(struct bio *bio)
197 switch (bio_op(bio)) { 197 switch (bio_op(bio)) {
198 case REQ_OP_DISCARD: 198 case REQ_OP_DISCARD:
199 case REQ_OP_SECURE_ERASE: 199 case REQ_OP_SECURE_ERASE:
200 case REQ_OP_WRITE_SAME:
201 case REQ_OP_WRITE_ZEROES: 200 case REQ_OP_WRITE_ZEROES:
201 return 0;
202 case REQ_OP_WRITE_SAME:
202 return 1; 203 return 1;
203 default: 204 default:
204 break; 205 break;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index ebeef2b79c5a..c5393766909d 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -120,10 +120,13 @@ typedef __u32 __bitwise req_flags_t;
120#define RQF_HASHED ((__force req_flags_t)(1 << 16)) 120#define RQF_HASHED ((__force req_flags_t)(1 << 16))
121/* IO stats tracking on */ 121/* IO stats tracking on */
122#define RQF_STATS ((__force req_flags_t)(1 << 17)) 122#define RQF_STATS ((__force req_flags_t)(1 << 17))
123/* Look at ->special_vec for the actual data payload instead of the
124 bio chain. */
125#define RQF_SPECIAL_PAYLOAD ((__force req_flags_t)(1 << 18))
123 126
124/* flags that prevent us from merging requests: */ 127/* flags that prevent us from merging requests: */
125#define RQF_NOMERGE_FLAGS \ 128#define RQF_NOMERGE_FLAGS \
126 (RQF_STARTED | RQF_SOFTBARRIER | RQF_FLUSH_SEQ) 129 (RQF_STARTED | RQF_SOFTBARRIER | RQF_FLUSH_SEQ | RQF_SPECIAL_PAYLOAD)
127 130
128#define BLK_MAX_CDB 16 131#define BLK_MAX_CDB 16
129 132
@@ -175,6 +178,7 @@ struct request {
175 */ 178 */
176 union { 179 union {
177 struct rb_node rb_node; /* sort/lookup */ 180 struct rb_node rb_node; /* sort/lookup */
181 struct bio_vec special_vec;
178 void *completion_data; 182 void *completion_data;
179 }; 183 };
180 184
@@ -909,8 +913,6 @@ extern void __blk_put_request(struct request_queue *, struct request *);
909extern struct request *blk_get_request(struct request_queue *, int, gfp_t); 913extern struct request *blk_get_request(struct request_queue *, int, gfp_t);
910extern void blk_rq_set_block_pc(struct request *); 914extern void blk_rq_set_block_pc(struct request *);
911extern void blk_requeue_request(struct request_queue *, struct request *); 915extern void blk_requeue_request(struct request_queue *, struct request *);
912extern void blk_add_request_payload(struct request *rq, struct page *page,
913 int offset, unsigned int len);
914extern int blk_lld_busy(struct request_queue *q); 916extern int blk_lld_busy(struct request_queue *q);
915extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src, 917extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
916 struct bio_set *bs, gfp_t gfp_mask, 918 struct bio_set *bs, gfp_t gfp_mask,
@@ -1153,6 +1155,13 @@ extern void blk_queue_flush_queueable(struct request_queue *q, bool queueable);
1153extern void blk_queue_write_cache(struct request_queue *q, bool enabled, bool fua); 1155extern void blk_queue_write_cache(struct request_queue *q, bool enabled, bool fua);
1154extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev); 1156extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev);
1155 1157
1158static inline unsigned short blk_rq_nr_phys_segments(struct request *rq)
1159{
1160 if (rq->rq_flags & RQF_SPECIAL_PAYLOAD)
1161 return 1;
1162 return rq->nr_phys_segments;
1163}
1164
1156extern int blk_rq_map_sg(struct request_queue *, struct request *, struct scatterlist *); 1165extern int blk_rq_map_sg(struct request_queue *, struct request *, struct scatterlist *);
1157extern void blk_dump_rq_flags(struct request *, char *); 1166extern void blk_dump_rq_flags(struct request *, char *);
1158extern long nr_blockdev_pages(void); 1167extern long nr_blockdev_pages(void);