aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/scsi/scsi_lib.c
diff options
context:
space:
mode:
authorChristoph Hellwig <hch@lst.de>2014-01-17 06:06:53 -0500
committerChristoph Hellwig <hch@lst.de>2014-07-25 17:16:28 -0400
commitd285203cf647d7c97db3a1c33794315c9008593f (patch)
treea28a0a902cf3f467e326a02f5632b917b75c56a3 /drivers/scsi/scsi_lib.c
parentc53c6d6a68b13b1dff2892551b56cfdc07887d9e (diff)
scsi: add support for a blk-mq based I/O path.
This patch adds support for an alternate I/O path in the scsi midlayer which uses the blk-mq infrastructure instead of the legacy request code. Use of blk-mq is fully transparent to drivers, although for now a host template field is provided to opt out of blk-mq usage in case any unforseen incompatibilities arise. In general replacing the legacy request code with blk-mq is a simple and mostly mechanical transformation. The biggest exception is the new code that deals with the fact the I/O submissions in blk-mq must happen from process context, which slightly complicates the I/O completion handler. The second biggest differences is that blk-mq is build around the concept of preallocated requests that also include driver specific data, which in SCSI context means the scsi_cmnd structure. This completely avoids dynamic memory allocations for the fast path through I/O submission. Due the preallocated requests the MQ code path exclusively uses the host-wide shared tag allocator instead of a per-LUN one. This only affects drivers actually using the block layer provided tag allocator instead of their own. Unlike the old path blk-mq always provides a tag, although drivers don't have to use it. For now the blk-mq path is disable by defauly and must be enabled using the "use_blk_mq" module parameter. Once the remaining work in the block layer to make blk-mq more suitable for slow devices is complete I hope to make it the default and eventually even remove the old code path. Based on the earlier scsi-mq prototype by Nicholas Bellinger. Thanks to Bart Van Assche and Robert Elliot for testing, benchmarking and various sugestions and code contributions. Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com> Reviewed-by: Hannes Reinecke <hare@suse.de> Reviewed-by: Webb Scales <webbnh@hp.com> Acked-by: Jens Axboe <axboe@kernel.dk> Tested-by: Bart Van Assche <bvanassche@acm.org> Tested-by: Robert Elliott <elliott@hp.com>
Diffstat (limited to 'drivers/scsi/scsi_lib.c')
-rw-r--r--drivers/scsi/scsi_lib.c464
1 files changed, 403 insertions, 61 deletions
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index bbd7a0a08692..9c44392b748f 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1,5 +1,6 @@
1/* 1/*
2 * scsi_lib.c Copyright (C) 1999 Eric Youngdale 2 * Copyright (C) 1999 Eric Youngdale
3 * Copyright (C) 2014 Christoph Hellwig
3 * 4 *
4 * SCSI queueing library. 5 * SCSI queueing library.
5 * Initial versions: Eric Youngdale (eric@andante.org). 6 * Initial versions: Eric Youngdale (eric@andante.org).
@@ -20,6 +21,7 @@
20#include <linux/delay.h> 21#include <linux/delay.h>
21#include <linux/hardirq.h> 22#include <linux/hardirq.h>
22#include <linux/scatterlist.h> 23#include <linux/scatterlist.h>
24#include <linux/blk-mq.h>
23 25
24#include <scsi/scsi.h> 26#include <scsi/scsi.h>
25#include <scsi/scsi_cmnd.h> 27#include <scsi/scsi_cmnd.h>
@@ -113,6 +115,16 @@ scsi_set_blocked(struct scsi_cmnd *cmd, int reason)
113 } 115 }
114} 116}
115 117
118static void scsi_mq_requeue_cmd(struct scsi_cmnd *cmd)
119{
120 struct scsi_device *sdev = cmd->device;
121 struct request_queue *q = cmd->request->q;
122
123 blk_mq_requeue_request(cmd->request);
124 blk_mq_kick_requeue_list(q);
125 put_device(&sdev->sdev_gendev);
126}
127
116/** 128/**
117 * __scsi_queue_insert - private queue insertion 129 * __scsi_queue_insert - private queue insertion
118 * @cmd: The SCSI command being requeued 130 * @cmd: The SCSI command being requeued
@@ -150,6 +162,10 @@ static void __scsi_queue_insert(struct scsi_cmnd *cmd, int reason, int unbusy)
150 * before blk_cleanup_queue() finishes. 162 * before blk_cleanup_queue() finishes.
151 */ 163 */
152 cmd->result = 0; 164 cmd->result = 0;
165 if (q->mq_ops) {
166 scsi_mq_requeue_cmd(cmd);
167 return;
168 }
153 spin_lock_irqsave(q->queue_lock, flags); 169 spin_lock_irqsave(q->queue_lock, flags);
154 blk_requeue_request(q, cmd->request); 170 blk_requeue_request(q, cmd->request);
155 kblockd_schedule_work(&device->requeue_work); 171 kblockd_schedule_work(&device->requeue_work);
@@ -308,6 +324,14 @@ void scsi_device_unbusy(struct scsi_device *sdev)
308 atomic_dec(&sdev->device_busy); 324 atomic_dec(&sdev->device_busy);
309} 325}
310 326
327static void scsi_kick_queue(struct request_queue *q)
328{
329 if (q->mq_ops)
330 blk_mq_start_hw_queues(q);
331 else
332 blk_run_queue(q);
333}
334
311/* 335/*
312 * Called for single_lun devices on IO completion. Clear starget_sdev_user, 336 * Called for single_lun devices on IO completion. Clear starget_sdev_user,
313 * and call blk_run_queue for all the scsi_devices on the target - 337 * and call blk_run_queue for all the scsi_devices on the target -
@@ -332,7 +356,7 @@ static void scsi_single_lun_run(struct scsi_device *current_sdev)
332 * but in most cases, we will be first. Ideally, each LU on the 356 * but in most cases, we will be first. Ideally, each LU on the
333 * target would get some limited time or requests on the target. 357 * target would get some limited time or requests on the target.
334 */ 358 */
335 blk_run_queue(current_sdev->request_queue); 359 scsi_kick_queue(current_sdev->request_queue);
336 360
337 spin_lock_irqsave(shost->host_lock, flags); 361 spin_lock_irqsave(shost->host_lock, flags);
338 if (starget->starget_sdev_user) 362 if (starget->starget_sdev_user)
@@ -345,7 +369,7 @@ static void scsi_single_lun_run(struct scsi_device *current_sdev)
345 continue; 369 continue;
346 370
347 spin_unlock_irqrestore(shost->host_lock, flags); 371 spin_unlock_irqrestore(shost->host_lock, flags);
348 blk_run_queue(sdev->request_queue); 372 scsi_kick_queue(sdev->request_queue);
349 spin_lock_irqsave(shost->host_lock, flags); 373 spin_lock_irqsave(shost->host_lock, flags);
350 374
351 scsi_device_put(sdev); 375 scsi_device_put(sdev);
@@ -435,7 +459,7 @@ static void scsi_starved_list_run(struct Scsi_Host *shost)
435 continue; 459 continue;
436 spin_unlock_irqrestore(shost->host_lock, flags); 460 spin_unlock_irqrestore(shost->host_lock, flags);
437 461
438 blk_run_queue(slq); 462 scsi_kick_queue(slq);
439 blk_put_queue(slq); 463 blk_put_queue(slq);
440 464
441 spin_lock_irqsave(shost->host_lock, flags); 465 spin_lock_irqsave(shost->host_lock, flags);
@@ -466,7 +490,10 @@ static void scsi_run_queue(struct request_queue *q)
466 if (!list_empty(&sdev->host->starved_list)) 490 if (!list_empty(&sdev->host->starved_list))
467 scsi_starved_list_run(sdev->host); 491 scsi_starved_list_run(sdev->host);
468 492
469 blk_run_queue(q); 493 if (q->mq_ops)
494 blk_mq_start_stopped_hw_queues(q, false);
495 else
496 blk_run_queue(q);
470} 497}
471 498
472void scsi_requeue_run_queue(struct work_struct *work) 499void scsi_requeue_run_queue(struct work_struct *work)
@@ -564,25 +591,72 @@ static struct scatterlist *scsi_sg_alloc(unsigned int nents, gfp_t gfp_mask)
564 return mempool_alloc(sgp->pool, gfp_mask); 591 return mempool_alloc(sgp->pool, gfp_mask);
565} 592}
566 593
567static void scsi_free_sgtable(struct scsi_data_buffer *sdb) 594static void scsi_free_sgtable(struct scsi_data_buffer *sdb, bool mq)
568{ 595{
569 __sg_free_table(&sdb->table, SCSI_MAX_SG_SEGMENTS, false, scsi_sg_free); 596 if (mq && sdb->table.nents <= SCSI_MAX_SG_SEGMENTS)
597 return;
598 __sg_free_table(&sdb->table, SCSI_MAX_SG_SEGMENTS, mq, scsi_sg_free);
570} 599}
571 600
572static int scsi_alloc_sgtable(struct scsi_data_buffer *sdb, int nents, 601static int scsi_alloc_sgtable(struct scsi_data_buffer *sdb, int nents,
573 gfp_t gfp_mask) 602 gfp_t gfp_mask, bool mq)
574{ 603{
604 struct scatterlist *first_chunk = NULL;
575 int ret; 605 int ret;
576 606
577 BUG_ON(!nents); 607 BUG_ON(!nents);
578 608
609 if (mq) {
610 if (nents <= SCSI_MAX_SG_SEGMENTS) {
611 sdb->table.nents = nents;
612 sg_init_table(sdb->table.sgl, sdb->table.nents);
613 return 0;
614 }
615 first_chunk = sdb->table.sgl;
616 }
617
579 ret = __sg_alloc_table(&sdb->table, nents, SCSI_MAX_SG_SEGMENTS, 618 ret = __sg_alloc_table(&sdb->table, nents, SCSI_MAX_SG_SEGMENTS,
580 NULL, gfp_mask, scsi_sg_alloc); 619 first_chunk, gfp_mask, scsi_sg_alloc);
581 if (unlikely(ret)) 620 if (unlikely(ret))
582 scsi_free_sgtable(sdb); 621 scsi_free_sgtable(sdb, mq);
583 return ret; 622 return ret;
584} 623}
585 624
625static void scsi_uninit_cmd(struct scsi_cmnd *cmd)
626{
627 if (cmd->request->cmd_type == REQ_TYPE_FS) {
628 struct scsi_driver *drv = scsi_cmd_to_driver(cmd);
629
630 if (drv->uninit_command)
631 drv->uninit_command(cmd);
632 }
633}
634
635static void scsi_mq_free_sgtables(struct scsi_cmnd *cmd)
636{
637 if (cmd->sdb.table.nents)
638 scsi_free_sgtable(&cmd->sdb, true);
639 if (cmd->request->next_rq && cmd->request->next_rq->special)
640 scsi_free_sgtable(cmd->request->next_rq->special, true);
641 if (scsi_prot_sg_count(cmd))
642 scsi_free_sgtable(cmd->prot_sdb, true);
643}
644
645static void scsi_mq_uninit_cmd(struct scsi_cmnd *cmd)
646{
647 struct scsi_device *sdev = cmd->device;
648 unsigned long flags;
649
650 BUG_ON(list_empty(&cmd->list));
651
652 scsi_mq_free_sgtables(cmd);
653 scsi_uninit_cmd(cmd);
654
655 spin_lock_irqsave(&sdev->list_lock, flags);
656 list_del_init(&cmd->list);
657 spin_unlock_irqrestore(&sdev->list_lock, flags);
658}
659
586/* 660/*
587 * Function: scsi_release_buffers() 661 * Function: scsi_release_buffers()
588 * 662 *
@@ -602,19 +676,19 @@ static int scsi_alloc_sgtable(struct scsi_data_buffer *sdb, int nents,
602static void scsi_release_buffers(struct scsi_cmnd *cmd) 676static void scsi_release_buffers(struct scsi_cmnd *cmd)
603{ 677{
604 if (cmd->sdb.table.nents) 678 if (cmd->sdb.table.nents)
605 scsi_free_sgtable(&cmd->sdb); 679 scsi_free_sgtable(&cmd->sdb, false);
606 680
607 memset(&cmd->sdb, 0, sizeof(cmd->sdb)); 681 memset(&cmd->sdb, 0, sizeof(cmd->sdb));
608 682
609 if (scsi_prot_sg_count(cmd)) 683 if (scsi_prot_sg_count(cmd))
610 scsi_free_sgtable(cmd->prot_sdb); 684 scsi_free_sgtable(cmd->prot_sdb, false);
611} 685}
612 686
613static void scsi_release_bidi_buffers(struct scsi_cmnd *cmd) 687static void scsi_release_bidi_buffers(struct scsi_cmnd *cmd)
614{ 688{
615 struct scsi_data_buffer *bidi_sdb = cmd->request->next_rq->special; 689 struct scsi_data_buffer *bidi_sdb = cmd->request->next_rq->special;
616 690
617 scsi_free_sgtable(bidi_sdb); 691 scsi_free_sgtable(bidi_sdb, false);
618 kmem_cache_free(scsi_sdb_cache, bidi_sdb); 692 kmem_cache_free(scsi_sdb_cache, bidi_sdb);
619 cmd->request->next_rq->special = NULL; 693 cmd->request->next_rq->special = NULL;
620} 694}
@@ -625,8 +699,6 @@ static bool scsi_end_request(struct request *req, int error,
625 struct scsi_cmnd *cmd = req->special; 699 struct scsi_cmnd *cmd = req->special;
626 struct scsi_device *sdev = cmd->device; 700 struct scsi_device *sdev = cmd->device;
627 struct request_queue *q = sdev->request_queue; 701 struct request_queue *q = sdev->request_queue;
628 unsigned long flags;
629
630 702
631 if (blk_update_request(req, error, bytes)) 703 if (blk_update_request(req, error, bytes))
632 return true; 704 return true;
@@ -639,14 +711,38 @@ static bool scsi_end_request(struct request *req, int error,
639 if (blk_queue_add_random(q)) 711 if (blk_queue_add_random(q))
640 add_disk_randomness(req->rq_disk); 712 add_disk_randomness(req->rq_disk);
641 713
642 spin_lock_irqsave(q->queue_lock, flags); 714 if (req->mq_ctx) {
643 blk_finish_request(req, error); 715 /*
644 spin_unlock_irqrestore(q->queue_lock, flags); 716 * In the MQ case the command gets freed by __blk_mq_end_io,
717 * so we have to do all cleanup that depends on it earlier.
718 *
719 * We also can't kick the queues from irq context, so we
720 * will have to defer it to a workqueue.
721 */
722 scsi_mq_uninit_cmd(cmd);
723
724 __blk_mq_end_io(req, error);
725
726 if (scsi_target(sdev)->single_lun ||
727 !list_empty(&sdev->host->starved_list))
728 kblockd_schedule_work(&sdev->requeue_work);
729 else
730 blk_mq_start_stopped_hw_queues(q, true);
731
732 put_device(&sdev->sdev_gendev);
733 } else {
734 unsigned long flags;
735
736 spin_lock_irqsave(q->queue_lock, flags);
737 blk_finish_request(req, error);
738 spin_unlock_irqrestore(q->queue_lock, flags);
739
740 if (bidi_bytes)
741 scsi_release_bidi_buffers(cmd);
742 scsi_release_buffers(cmd);
743 scsi_next_command(cmd);
744 }
645 745
646 if (bidi_bytes)
647 scsi_release_bidi_buffers(cmd);
648 scsi_release_buffers(cmd);
649 scsi_next_command(cmd);
650 return false; 746 return false;
651} 747}
652 748
@@ -953,8 +1049,14 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
953 /* Unprep the request and put it back at the head of the queue. 1049 /* Unprep the request and put it back at the head of the queue.
954 * A new command will be prepared and issued. 1050 * A new command will be prepared and issued.
955 */ 1051 */
956 scsi_release_buffers(cmd); 1052 if (q->mq_ops) {
957 scsi_requeue_command(q, cmd); 1053 cmd->request->cmd_flags &= ~REQ_DONTPREP;
1054 scsi_mq_uninit_cmd(cmd);
1055 scsi_mq_requeue_cmd(cmd);
1056 } else {
1057 scsi_release_buffers(cmd);
1058 scsi_requeue_command(q, cmd);
1059 }
958 break; 1060 break;
959 case ACTION_RETRY: 1061 case ACTION_RETRY:
960 /* Retry the same command immediately */ 1062 /* Retry the same command immediately */
@@ -976,9 +1078,8 @@ static int scsi_init_sgtable(struct request *req, struct scsi_data_buffer *sdb,
976 * If sg table allocation fails, requeue request later. 1078 * If sg table allocation fails, requeue request later.
977 */ 1079 */
978 if (unlikely(scsi_alloc_sgtable(sdb, req->nr_phys_segments, 1080 if (unlikely(scsi_alloc_sgtable(sdb, req->nr_phys_segments,
979 gfp_mask))) { 1081 gfp_mask, req->mq_ctx != NULL)))
980 return BLKPREP_DEFER; 1082 return BLKPREP_DEFER;
981 }
982 1083
983 /* 1084 /*
984 * Next, walk the list, and fill in the addresses and sizes of 1085 * Next, walk the list, and fill in the addresses and sizes of
@@ -1006,6 +1107,7 @@ int scsi_init_io(struct scsi_cmnd *cmd, gfp_t gfp_mask)
1006{ 1107{
1007 struct scsi_device *sdev = cmd->device; 1108 struct scsi_device *sdev = cmd->device;
1008 struct request *rq = cmd->request; 1109 struct request *rq = cmd->request;
1110 bool is_mq = (rq->mq_ctx != NULL);
1009 int error; 1111 int error;
1010 1112
1011 BUG_ON(!rq->nr_phys_segments); 1113 BUG_ON(!rq->nr_phys_segments);
@@ -1015,15 +1117,19 @@ int scsi_init_io(struct scsi_cmnd *cmd, gfp_t gfp_mask)
1015 goto err_exit; 1117 goto err_exit;
1016 1118
1017 if (blk_bidi_rq(rq)) { 1119 if (blk_bidi_rq(rq)) {
1018 struct scsi_data_buffer *bidi_sdb = kmem_cache_zalloc( 1120 if (!rq->q->mq_ops) {
1019 scsi_sdb_cache, GFP_ATOMIC); 1121 struct scsi_data_buffer *bidi_sdb =
1020 if (!bidi_sdb) { 1122 kmem_cache_zalloc(scsi_sdb_cache, GFP_ATOMIC);
1021 error = BLKPREP_DEFER; 1123 if (!bidi_sdb) {
1022 goto err_exit; 1124 error = BLKPREP_DEFER;
1125 goto err_exit;
1126 }
1127
1128 rq->next_rq->special = bidi_sdb;
1023 } 1129 }
1024 1130
1025 rq->next_rq->special = bidi_sdb; 1131 error = scsi_init_sgtable(rq->next_rq, rq->next_rq->special,
1026 error = scsi_init_sgtable(rq->next_rq, bidi_sdb, GFP_ATOMIC); 1132 GFP_ATOMIC);
1027 if (error) 1133 if (error)
1028 goto err_exit; 1134 goto err_exit;
1029 } 1135 }
@@ -1035,7 +1141,7 @@ int scsi_init_io(struct scsi_cmnd *cmd, gfp_t gfp_mask)
1035 BUG_ON(prot_sdb == NULL); 1141 BUG_ON(prot_sdb == NULL);
1036 ivecs = blk_rq_count_integrity_sg(rq->q, rq->bio); 1142 ivecs = blk_rq_count_integrity_sg(rq->q, rq->bio);
1037 1143
1038 if (scsi_alloc_sgtable(prot_sdb, ivecs, gfp_mask)) { 1144 if (scsi_alloc_sgtable(prot_sdb, ivecs, gfp_mask, is_mq)) {
1039 error = BLKPREP_DEFER; 1145 error = BLKPREP_DEFER;
1040 goto err_exit; 1146 goto err_exit;
1041 } 1147 }
@@ -1049,13 +1155,16 @@ int scsi_init_io(struct scsi_cmnd *cmd, gfp_t gfp_mask)
1049 cmd->prot_sdb->table.nents = count; 1155 cmd->prot_sdb->table.nents = count;
1050 } 1156 }
1051 1157
1052 return BLKPREP_OK ; 1158 return BLKPREP_OK;
1053
1054err_exit: 1159err_exit:
1055 scsi_release_buffers(cmd); 1160 if (is_mq) {
1056 cmd->request->special = NULL; 1161 scsi_mq_free_sgtables(cmd);
1057 scsi_put_command(cmd); 1162 } else {
1058 put_device(&sdev->sdev_gendev); 1163 scsi_release_buffers(cmd);
1164 cmd->request->special = NULL;
1165 scsi_put_command(cmd);
1166 put_device(&sdev->sdev_gendev);
1167 }
1059 return error; 1168 return error;
1060} 1169}
1061EXPORT_SYMBOL(scsi_init_io); 1170EXPORT_SYMBOL(scsi_init_io);
@@ -1266,13 +1375,7 @@ out:
1266 1375
1267static void scsi_unprep_fn(struct request_queue *q, struct request *req) 1376static void scsi_unprep_fn(struct request_queue *q, struct request *req)
1268{ 1377{
1269 if (req->cmd_type == REQ_TYPE_FS) { 1378 scsi_uninit_cmd(req->special);
1270 struct scsi_cmnd *cmd = req->special;
1271 struct scsi_driver *drv = scsi_cmd_to_driver(cmd);
1272
1273 if (drv->uninit_command)
1274 drv->uninit_command(cmd);
1275 }
1276} 1379}
1277 1380
1278/* 1381/*
@@ -1295,7 +1398,11 @@ static inline int scsi_dev_queue_ready(struct request_queue *q,
1295 * unblock after device_blocked iterates to zero 1398 * unblock after device_blocked iterates to zero
1296 */ 1399 */
1297 if (atomic_dec_return(&sdev->device_blocked) > 0) { 1400 if (atomic_dec_return(&sdev->device_blocked) > 0) {
1298 blk_delay_queue(q, SCSI_QUEUE_DELAY); 1401 /*
1402 * For the MQ case we take care of this in the caller.
1403 */
1404 if (!q->mq_ops)
1405 blk_delay_queue(q, SCSI_QUEUE_DELAY);
1299 goto out_dec; 1406 goto out_dec;
1300 } 1407 }
1301 SCSI_LOG_MLQUEUE(3, sdev_printk(KERN_INFO, sdev, 1408 SCSI_LOG_MLQUEUE(3, sdev_printk(KERN_INFO, sdev,
@@ -1671,6 +1778,180 @@ out_delay:
1671 blk_delay_queue(q, SCSI_QUEUE_DELAY); 1778 blk_delay_queue(q, SCSI_QUEUE_DELAY);
1672} 1779}
1673 1780
1781static inline int prep_to_mq(int ret)
1782{
1783 switch (ret) {
1784 case BLKPREP_OK:
1785 return 0;
1786 case BLKPREP_DEFER:
1787 return BLK_MQ_RQ_QUEUE_BUSY;
1788 default:
1789 return BLK_MQ_RQ_QUEUE_ERROR;
1790 }
1791}
1792
1793static int scsi_mq_prep_fn(struct request *req)
1794{
1795 struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(req);
1796 struct scsi_device *sdev = req->q->queuedata;
1797 struct Scsi_Host *shost = sdev->host;
1798 unsigned char *sense_buf = cmd->sense_buffer;
1799 struct scatterlist *sg;
1800
1801 memset(cmd, 0, sizeof(struct scsi_cmnd));
1802
1803 req->special = cmd;
1804
1805 cmd->request = req;
1806 cmd->device = sdev;
1807 cmd->sense_buffer = sense_buf;
1808
1809 cmd->tag = req->tag;
1810
1811 req->cmd = req->__cmd;
1812 cmd->cmnd = req->cmd;
1813 cmd->prot_op = SCSI_PROT_NORMAL;
1814
1815 INIT_LIST_HEAD(&cmd->list);
1816 INIT_DELAYED_WORK(&cmd->abort_work, scmd_eh_abort_handler);
1817 cmd->jiffies_at_alloc = jiffies;
1818
1819 /*
1820 * XXX: cmd_list lookups are only used by two drivers, try to get
1821 * rid of this list in common code.
1822 */
1823 spin_lock_irq(&sdev->list_lock);
1824 list_add_tail(&cmd->list, &sdev->cmd_list);
1825 spin_unlock_irq(&sdev->list_lock);
1826
1827 sg = (void *)cmd + sizeof(struct scsi_cmnd) + shost->hostt->cmd_size;
1828 cmd->sdb.table.sgl = sg;
1829
1830 if (scsi_host_get_prot(shost)) {
1831 cmd->prot_sdb = (void *)sg +
1832 shost->sg_tablesize * sizeof(struct scatterlist);
1833 memset(cmd->prot_sdb, 0, sizeof(struct scsi_data_buffer));
1834
1835 cmd->prot_sdb->table.sgl =
1836 (struct scatterlist *)(cmd->prot_sdb + 1);
1837 }
1838
1839 if (blk_bidi_rq(req)) {
1840 struct request *next_rq = req->next_rq;
1841 struct scsi_data_buffer *bidi_sdb = blk_mq_rq_to_pdu(next_rq);
1842
1843 memset(bidi_sdb, 0, sizeof(struct scsi_data_buffer));
1844 bidi_sdb->table.sgl =
1845 (struct scatterlist *)(bidi_sdb + 1);
1846
1847 next_rq->special = bidi_sdb;
1848 }
1849
1850 return scsi_setup_cmnd(sdev, req);
1851}
1852
1853static void scsi_mq_done(struct scsi_cmnd *cmd)
1854{
1855 trace_scsi_dispatch_cmd_done(cmd);
1856 blk_mq_complete_request(cmd->request);
1857}
1858
1859static int scsi_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req)
1860{
1861 struct request_queue *q = req->q;
1862 struct scsi_device *sdev = q->queuedata;
1863 struct Scsi_Host *shost = sdev->host;
1864 struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(req);
1865 int ret;
1866 int reason;
1867
1868 ret = prep_to_mq(scsi_prep_state_check(sdev, req));
1869 if (ret)
1870 goto out;
1871
1872 ret = BLK_MQ_RQ_QUEUE_BUSY;
1873 if (!get_device(&sdev->sdev_gendev))
1874 goto out;
1875
1876 if (!scsi_dev_queue_ready(q, sdev))
1877 goto out_put_device;
1878 if (!scsi_target_queue_ready(shost, sdev))
1879 goto out_dec_device_busy;
1880 if (!scsi_host_queue_ready(q, shost, sdev))
1881 goto out_dec_target_busy;
1882
1883 if (!(req->cmd_flags & REQ_DONTPREP)) {
1884 ret = prep_to_mq(scsi_mq_prep_fn(req));
1885 if (ret)
1886 goto out_dec_host_busy;
1887 req->cmd_flags |= REQ_DONTPREP;
1888 }
1889
1890 scsi_init_cmd_errh(cmd);
1891 cmd->scsi_done = scsi_mq_done;
1892
1893 reason = scsi_dispatch_cmd(cmd);
1894 if (reason) {
1895 scsi_set_blocked(cmd, reason);
1896 ret = BLK_MQ_RQ_QUEUE_BUSY;
1897 goto out_dec_host_busy;
1898 }
1899
1900 return BLK_MQ_RQ_QUEUE_OK;
1901
1902out_dec_host_busy:
1903 atomic_dec(&shost->host_busy);
1904out_dec_target_busy:
1905 if (scsi_target(sdev)->can_queue > 0)
1906 atomic_dec(&scsi_target(sdev)->target_busy);
1907out_dec_device_busy:
1908 atomic_dec(&sdev->device_busy);
1909out_put_device:
1910 put_device(&sdev->sdev_gendev);
1911out:
1912 switch (ret) {
1913 case BLK_MQ_RQ_QUEUE_BUSY:
1914 blk_mq_stop_hw_queue(hctx);
1915 if (atomic_read(&sdev->device_busy) == 0 &&
1916 !scsi_device_blocked(sdev))
1917 blk_mq_delay_queue(hctx, SCSI_QUEUE_DELAY);
1918 break;
1919 case BLK_MQ_RQ_QUEUE_ERROR:
1920 /*
1921 * Make sure to release all allocated ressources when
1922 * we hit an error, as we will never see this command
1923 * again.
1924 */
1925 if (req->cmd_flags & REQ_DONTPREP)
1926 scsi_mq_uninit_cmd(cmd);
1927 break;
1928 default:
1929 break;
1930 }
1931 return ret;
1932}
1933
1934static int scsi_init_request(void *data, struct request *rq,
1935 unsigned int hctx_idx, unsigned int request_idx,
1936 unsigned int numa_node)
1937{
1938 struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(rq);
1939
1940 cmd->sense_buffer = kzalloc_node(SCSI_SENSE_BUFFERSIZE, GFP_KERNEL,
1941 numa_node);
1942 if (!cmd->sense_buffer)
1943 return -ENOMEM;
1944 return 0;
1945}
1946
1947static void scsi_exit_request(void *data, struct request *rq,
1948 unsigned int hctx_idx, unsigned int request_idx)
1949{
1950 struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(rq);
1951
1952 kfree(cmd->sense_buffer);
1953}
1954
1674static u64 scsi_calculate_bounce_limit(struct Scsi_Host *shost) 1955static u64 scsi_calculate_bounce_limit(struct Scsi_Host *shost)
1675{ 1956{
1676 struct device *host_dev; 1957 struct device *host_dev;
@@ -1692,16 +1973,10 @@ static u64 scsi_calculate_bounce_limit(struct Scsi_Host *shost)
1692 return bounce_limit; 1973 return bounce_limit;
1693} 1974}
1694 1975
1695struct request_queue *__scsi_alloc_queue(struct Scsi_Host *shost, 1976static void __scsi_init_queue(struct Scsi_Host *shost, struct request_queue *q)
1696 request_fn_proc *request_fn)
1697{ 1977{
1698 struct request_queue *q;
1699 struct device *dev = shost->dma_dev; 1978 struct device *dev = shost->dma_dev;
1700 1979
1701 q = blk_init_queue(request_fn, NULL);
1702 if (!q)
1703 return NULL;
1704
1705 /* 1980 /*
1706 * this limit is imposed by hardware restrictions 1981 * this limit is imposed by hardware restrictions
1707 */ 1982 */
@@ -1732,7 +2007,17 @@ struct request_queue *__scsi_alloc_queue(struct Scsi_Host *shost,
1732 * blk_queue_update_dma_alignment() later. 2007 * blk_queue_update_dma_alignment() later.
1733 */ 2008 */
1734 blk_queue_dma_alignment(q, 0x03); 2009 blk_queue_dma_alignment(q, 0x03);
2010}
1735 2011
2012struct request_queue *__scsi_alloc_queue(struct Scsi_Host *shost,
2013 request_fn_proc *request_fn)
2014{
2015 struct request_queue *q;
2016
2017 q = blk_init_queue(request_fn, NULL);
2018 if (!q)
2019 return NULL;
2020 __scsi_init_queue(shost, q);
1736 return q; 2021 return q;
1737} 2022}
1738EXPORT_SYMBOL(__scsi_alloc_queue); 2023EXPORT_SYMBOL(__scsi_alloc_queue);
@@ -1753,6 +2038,55 @@ struct request_queue *scsi_alloc_queue(struct scsi_device *sdev)
1753 return q; 2038 return q;
1754} 2039}
1755 2040
2041static struct blk_mq_ops scsi_mq_ops = {
2042 .map_queue = blk_mq_map_queue,
2043 .queue_rq = scsi_queue_rq,
2044 .complete = scsi_softirq_done,
2045 .timeout = scsi_times_out,
2046 .init_request = scsi_init_request,
2047 .exit_request = scsi_exit_request,
2048};
2049
2050struct request_queue *scsi_mq_alloc_queue(struct scsi_device *sdev)
2051{
2052 sdev->request_queue = blk_mq_init_queue(&sdev->host->tag_set);
2053 if (IS_ERR(sdev->request_queue))
2054 return NULL;
2055
2056 sdev->request_queue->queuedata = sdev;
2057 __scsi_init_queue(sdev->host, sdev->request_queue);
2058 return sdev->request_queue;
2059}
2060
2061int scsi_mq_setup_tags(struct Scsi_Host *shost)
2062{
2063 unsigned int cmd_size, sgl_size, tbl_size;
2064
2065 tbl_size = shost->sg_tablesize;
2066 if (tbl_size > SCSI_MAX_SG_SEGMENTS)
2067 tbl_size = SCSI_MAX_SG_SEGMENTS;
2068 sgl_size = tbl_size * sizeof(struct scatterlist);
2069 cmd_size = sizeof(struct scsi_cmnd) + shost->hostt->cmd_size + sgl_size;
2070 if (scsi_host_get_prot(shost))
2071 cmd_size += sizeof(struct scsi_data_buffer) + sgl_size;
2072
2073 memset(&shost->tag_set, 0, sizeof(shost->tag_set));
2074 shost->tag_set.ops = &scsi_mq_ops;
2075 shost->tag_set.nr_hw_queues = 1;
2076 shost->tag_set.queue_depth = shost->can_queue;
2077 shost->tag_set.cmd_size = cmd_size;
2078 shost->tag_set.numa_node = NUMA_NO_NODE;
2079 shost->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE;
2080 shost->tag_set.driver_data = shost;
2081
2082 return blk_mq_alloc_tag_set(&shost->tag_set);
2083}
2084
2085void scsi_mq_destroy_tags(struct Scsi_Host *shost)
2086{
2087 blk_mq_free_tag_set(&shost->tag_set);
2088}
2089
1756/* 2090/*
1757 * Function: scsi_block_requests() 2091 * Function: scsi_block_requests()
1758 * 2092 *
@@ -2498,9 +2832,13 @@ scsi_internal_device_block(struct scsi_device *sdev)
2498 * block layer from calling the midlayer with this device's 2832 * block layer from calling the midlayer with this device's
2499 * request queue. 2833 * request queue.
2500 */ 2834 */
2501 spin_lock_irqsave(q->queue_lock, flags); 2835 if (q->mq_ops) {
2502 blk_stop_queue(q); 2836 blk_mq_stop_hw_queues(q);
2503 spin_unlock_irqrestore(q->queue_lock, flags); 2837 } else {
2838 spin_lock_irqsave(q->queue_lock, flags);
2839 blk_stop_queue(q);
2840 spin_unlock_irqrestore(q->queue_lock, flags);
2841 }
2504 2842
2505 return 0; 2843 return 0;
2506} 2844}
@@ -2546,9 +2884,13 @@ scsi_internal_device_unblock(struct scsi_device *sdev,
2546 sdev->sdev_state != SDEV_OFFLINE) 2884 sdev->sdev_state != SDEV_OFFLINE)
2547 return -EINVAL; 2885 return -EINVAL;
2548 2886
2549 spin_lock_irqsave(q->queue_lock, flags); 2887 if (q->mq_ops) {
2550 blk_start_queue(q); 2888 blk_mq_start_stopped_hw_queues(q, false);
2551 spin_unlock_irqrestore(q->queue_lock, flags); 2889 } else {
2890 spin_lock_irqsave(q->queue_lock, flags);
2891 blk_start_queue(q);
2892 spin_unlock_irqrestore(q->queue_lock, flags);
2893 }
2552 2894
2553 return 0; 2895 return 0;
2554} 2896}