diff options
author | Ming Lei <ming.lei@canonical.com> | 2014-06-26 05:41:48 -0400 |
---|---|---|
committer | Jens Axboe <axboe@fb.com> | 2014-07-01 12:51:03 -0400 |
commit | 6a27b656fc0210e976db362e1368c56db05c8f08 (patch) | |
tree | 063dfdcbeb4c24e1c100e8a5bfd1bd99cbdd7d10 | |
parent | cb553215d5d277d4838d7d6b7722e964bcf5ca1f (diff) |
block: virtio-blk: support multi virt queues per virtio-blk device
Firstly this patch supports more than one virtual queues for virtio-blk
device.
Secondly this patch maps the virtual queue to blk-mq's hardware queue.
With this approach, both scalability and performance can be improved.
Signed-off-by: Ming Lei <ming.lei@canonical.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
-rw-r--r-- | drivers/block/virtio_blk.c | 104 |
1 files changed, 84 insertions, 20 deletions
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index f63d358f3d93..0a581400de0f 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c | |||
@@ -15,17 +15,22 @@ | |||
15 | #include <linux/numa.h> | 15 | #include <linux/numa.h> |
16 | 16 | ||
17 | #define PART_BITS 4 | 17 | #define PART_BITS 4 |
18 | #define VQ_NAME_LEN 16 | ||
18 | 19 | ||
19 | static int major; | 20 | static int major; |
20 | static DEFINE_IDA(vd_index_ida); | 21 | static DEFINE_IDA(vd_index_ida); |
21 | 22 | ||
22 | static struct workqueue_struct *virtblk_wq; | 23 | static struct workqueue_struct *virtblk_wq; |
23 | 24 | ||
25 | struct virtio_blk_vq { | ||
26 | struct virtqueue *vq; | ||
27 | spinlock_t lock; | ||
28 | char name[VQ_NAME_LEN]; | ||
29 | } ____cacheline_aligned_in_smp; | ||
30 | |||
24 | struct virtio_blk | 31 | struct virtio_blk |
25 | { | 32 | { |
26 | struct virtio_device *vdev; | 33 | struct virtio_device *vdev; |
27 | struct virtqueue *vq; | ||
28 | spinlock_t vq_lock; | ||
29 | 34 | ||
30 | /* The disk structure for the kernel. */ | 35 | /* The disk structure for the kernel. */ |
31 | struct gendisk *disk; | 36 | struct gendisk *disk; |
@@ -47,6 +52,10 @@ struct virtio_blk | |||
47 | 52 | ||
48 | /* Ida index - used to track minor number allocations. */ | 53 | /* Ida index - used to track minor number allocations. */ |
49 | int index; | 54 | int index; |
55 | |||
56 | /* num of vqs */ | ||
57 | int num_vqs; | ||
58 | struct virtio_blk_vq *vqs; | ||
50 | }; | 59 | }; |
51 | 60 | ||
52 | struct virtblk_req | 61 | struct virtblk_req |
@@ -133,14 +142,15 @@ static void virtblk_done(struct virtqueue *vq) | |||
133 | { | 142 | { |
134 | struct virtio_blk *vblk = vq->vdev->priv; | 143 | struct virtio_blk *vblk = vq->vdev->priv; |
135 | bool req_done = false; | 144 | bool req_done = false; |
145 | int qid = vq->index; | ||
136 | struct virtblk_req *vbr; | 146 | struct virtblk_req *vbr; |
137 | unsigned long flags; | 147 | unsigned long flags; |
138 | unsigned int len; | 148 | unsigned int len; |
139 | 149 | ||
140 | spin_lock_irqsave(&vblk->vq_lock, flags); | 150 | spin_lock_irqsave(&vblk->vqs[qid].lock, flags); |
141 | do { | 151 | do { |
142 | virtqueue_disable_cb(vq); | 152 | virtqueue_disable_cb(vq); |
143 | while ((vbr = virtqueue_get_buf(vblk->vq, &len)) != NULL) { | 153 | while ((vbr = virtqueue_get_buf(vblk->vqs[qid].vq, &len)) != NULL) { |
144 | blk_mq_complete_request(vbr->req); | 154 | blk_mq_complete_request(vbr->req); |
145 | req_done = true; | 155 | req_done = true; |
146 | } | 156 | } |
@@ -151,7 +161,7 @@ static void virtblk_done(struct virtqueue *vq) | |||
151 | /* In case queue is stopped waiting for more buffers. */ | 161 | /* In case queue is stopped waiting for more buffers. */ |
152 | if (req_done) | 162 | if (req_done) |
153 | blk_mq_start_stopped_hw_queues(vblk->disk->queue, true); | 163 | blk_mq_start_stopped_hw_queues(vblk->disk->queue, true); |
154 | spin_unlock_irqrestore(&vblk->vq_lock, flags); | 164 | spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags); |
155 | } | 165 | } |
156 | 166 | ||
157 | static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req) | 167 | static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req) |
@@ -160,6 +170,7 @@ static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req) | |||
160 | struct virtblk_req *vbr = blk_mq_rq_to_pdu(req); | 170 | struct virtblk_req *vbr = blk_mq_rq_to_pdu(req); |
161 | unsigned long flags; | 171 | unsigned long flags; |
162 | unsigned int num; | 172 | unsigned int num; |
173 | int qid = hctx->queue_num; | ||
163 | const bool last = (req->cmd_flags & REQ_END) != 0; | 174 | const bool last = (req->cmd_flags & REQ_END) != 0; |
164 | int err; | 175 | int err; |
165 | bool notify = false; | 176 | bool notify = false; |
@@ -202,12 +213,12 @@ static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req) | |||
202 | vbr->out_hdr.type |= VIRTIO_BLK_T_IN; | 213 | vbr->out_hdr.type |= VIRTIO_BLK_T_IN; |
203 | } | 214 | } |
204 | 215 | ||
205 | spin_lock_irqsave(&vblk->vq_lock, flags); | 216 | spin_lock_irqsave(&vblk->vqs[qid].lock, flags); |
206 | err = __virtblk_add_req(vblk->vq, vbr, vbr->sg, num); | 217 | err = __virtblk_add_req(vblk->vqs[qid].vq, vbr, vbr->sg, num); |
207 | if (err) { | 218 | if (err) { |
208 | virtqueue_kick(vblk->vq); | 219 | virtqueue_kick(vblk->vqs[qid].vq); |
209 | blk_mq_stop_hw_queue(hctx); | 220 | blk_mq_stop_hw_queue(hctx); |
210 | spin_unlock_irqrestore(&vblk->vq_lock, flags); | 221 | spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags); |
211 | /* Out of mem doesn't actually happen, since we fall back | 222 | /* Out of mem doesn't actually happen, since we fall back |
212 | * to direct descriptors */ | 223 | * to direct descriptors */ |
213 | if (err == -ENOMEM || err == -ENOSPC) | 224 | if (err == -ENOMEM || err == -ENOSPC) |
@@ -215,12 +226,12 @@ static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req) | |||
215 | return BLK_MQ_RQ_QUEUE_ERROR; | 226 | return BLK_MQ_RQ_QUEUE_ERROR; |
216 | } | 227 | } |
217 | 228 | ||
218 | if (last && virtqueue_kick_prepare(vblk->vq)) | 229 | if (last && virtqueue_kick_prepare(vblk->vqs[qid].vq)) |
219 | notify = true; | 230 | notify = true; |
220 | spin_unlock_irqrestore(&vblk->vq_lock, flags); | 231 | spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags); |
221 | 232 | ||
222 | if (notify) | 233 | if (notify) |
223 | virtqueue_notify(vblk->vq); | 234 | virtqueue_notify(vblk->vqs[qid].vq); |
224 | return BLK_MQ_RQ_QUEUE_OK; | 235 | return BLK_MQ_RQ_QUEUE_OK; |
225 | } | 236 | } |
226 | 237 | ||
@@ -377,12 +388,64 @@ static void virtblk_config_changed(struct virtio_device *vdev) | |||
377 | static int init_vq(struct virtio_blk *vblk) | 388 | static int init_vq(struct virtio_blk *vblk) |
378 | { | 389 | { |
379 | int err = 0; | 390 | int err = 0; |
391 | int i; | ||
392 | vq_callback_t **callbacks; | ||
393 | const char **names; | ||
394 | struct virtqueue **vqs; | ||
395 | unsigned short num_vqs; | ||
396 | struct virtio_device *vdev = vblk->vdev; | ||
397 | |||
398 | err = virtio_cread_feature(vdev, VIRTIO_BLK_F_MQ, | ||
399 | struct virtio_blk_config, num_queues, | ||
400 | &num_vqs); | ||
401 | if (err) | ||
402 | num_vqs = 1; | ||
403 | |||
404 | vblk->vqs = kmalloc(sizeof(*vblk->vqs) * num_vqs, GFP_KERNEL); | ||
405 | if (!vblk->vqs) { | ||
406 | err = -ENOMEM; | ||
407 | goto out; | ||
408 | } | ||
409 | |||
410 | names = kmalloc(sizeof(*names) * num_vqs, GFP_KERNEL); | ||
411 | if (!names) | ||
412 | goto err_names; | ||
413 | |||
414 | callbacks = kmalloc(sizeof(*callbacks) * num_vqs, GFP_KERNEL); | ||
415 | if (!callbacks) | ||
416 | goto err_callbacks; | ||
417 | |||
418 | vqs = kmalloc(sizeof(*vqs) * num_vqs, GFP_KERNEL); | ||
419 | if (!vqs) | ||
420 | goto err_vqs; | ||
380 | 421 | ||
381 | /* We expect one virtqueue, for output. */ | 422 | for (i = 0; i < num_vqs; i++) { |
382 | vblk->vq = virtio_find_single_vq(vblk->vdev, virtblk_done, "requests"); | 423 | callbacks[i] = virtblk_done; |
383 | if (IS_ERR(vblk->vq)) | 424 | snprintf(vblk->vqs[i].name, VQ_NAME_LEN, "req.%d", i); |
384 | err = PTR_ERR(vblk->vq); | 425 | names[i] = vblk->vqs[i].name; |
426 | } | ||
427 | |||
428 | /* Discover virtqueues and write information to configuration. */ | ||
429 | err = vdev->config->find_vqs(vdev, num_vqs, vqs, callbacks, names); | ||
430 | if (err) | ||
431 | goto err_find_vqs; | ||
385 | 432 | ||
433 | for (i = 0; i < num_vqs; i++) { | ||
434 | spin_lock_init(&vblk->vqs[i].lock); | ||
435 | vblk->vqs[i].vq = vqs[i]; | ||
436 | } | ||
437 | vblk->num_vqs = num_vqs; | ||
438 | |||
439 | err_find_vqs: | ||
440 | kfree(vqs); | ||
441 | err_vqs: | ||
442 | kfree(callbacks); | ||
443 | err_callbacks: | ||
444 | kfree(names); | ||
445 | err_names: | ||
446 | if (err) | ||
447 | kfree(vblk->vqs); | ||
448 | out: | ||
386 | return err; | 449 | return err; |
387 | } | 450 | } |
388 | 451 | ||
@@ -551,7 +614,6 @@ static int virtblk_probe(struct virtio_device *vdev) | |||
551 | err = init_vq(vblk); | 614 | err = init_vq(vblk); |
552 | if (err) | 615 | if (err) |
553 | goto out_free_vblk; | 616 | goto out_free_vblk; |
554 | spin_lock_init(&vblk->vq_lock); | ||
555 | 617 | ||
556 | /* FIXME: How many partitions? How long is a piece of string? */ | 618 | /* FIXME: How many partitions? How long is a piece of string? */ |
557 | vblk->disk = alloc_disk(1 << PART_BITS); | 619 | vblk->disk = alloc_disk(1 << PART_BITS); |
@@ -562,7 +624,7 @@ static int virtblk_probe(struct virtio_device *vdev) | |||
562 | 624 | ||
563 | /* Default queue sizing is to fill the ring. */ | 625 | /* Default queue sizing is to fill the ring. */ |
564 | if (!virtblk_queue_depth) { | 626 | if (!virtblk_queue_depth) { |
565 | virtblk_queue_depth = vblk->vq->num_free; | 627 | virtblk_queue_depth = vblk->vqs[0].vq->num_free; |
566 | /* ... but without indirect descs, we use 2 descs per req */ | 628 | /* ... but without indirect descs, we use 2 descs per req */ |
567 | if (!virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC)) | 629 | if (!virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC)) |
568 | virtblk_queue_depth /= 2; | 630 | virtblk_queue_depth /= 2; |
@@ -570,7 +632,6 @@ static int virtblk_probe(struct virtio_device *vdev) | |||
570 | 632 | ||
571 | memset(&vblk->tag_set, 0, sizeof(vblk->tag_set)); | 633 | memset(&vblk->tag_set, 0, sizeof(vblk->tag_set)); |
572 | vblk->tag_set.ops = &virtio_mq_ops; | 634 | vblk->tag_set.ops = &virtio_mq_ops; |
573 | vblk->tag_set.nr_hw_queues = 1; | ||
574 | vblk->tag_set.queue_depth = virtblk_queue_depth; | 635 | vblk->tag_set.queue_depth = virtblk_queue_depth; |
575 | vblk->tag_set.numa_node = NUMA_NO_NODE; | 636 | vblk->tag_set.numa_node = NUMA_NO_NODE; |
576 | vblk->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; | 637 | vblk->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; |
@@ -578,6 +639,7 @@ static int virtblk_probe(struct virtio_device *vdev) | |||
578 | sizeof(struct virtblk_req) + | 639 | sizeof(struct virtblk_req) + |
579 | sizeof(struct scatterlist) * sg_elems; | 640 | sizeof(struct scatterlist) * sg_elems; |
580 | vblk->tag_set.driver_data = vblk; | 641 | vblk->tag_set.driver_data = vblk; |
642 | vblk->tag_set.nr_hw_queues = vblk->num_vqs; | ||
581 | 643 | ||
582 | err = blk_mq_alloc_tag_set(&vblk->tag_set); | 644 | err = blk_mq_alloc_tag_set(&vblk->tag_set); |
583 | if (err) | 645 | if (err) |
@@ -727,6 +789,7 @@ static void virtblk_remove(struct virtio_device *vdev) | |||
727 | refc = atomic_read(&disk_to_dev(vblk->disk)->kobj.kref.refcount); | 789 | refc = atomic_read(&disk_to_dev(vblk->disk)->kobj.kref.refcount); |
728 | put_disk(vblk->disk); | 790 | put_disk(vblk->disk); |
729 | vdev->config->del_vqs(vdev); | 791 | vdev->config->del_vqs(vdev); |
792 | kfree(vblk->vqs); | ||
730 | kfree(vblk); | 793 | kfree(vblk); |
731 | 794 | ||
732 | /* Only free device id if we don't have any users */ | 795 | /* Only free device id if we don't have any users */ |
@@ -777,7 +840,8 @@ static const struct virtio_device_id id_table[] = { | |||
777 | static unsigned int features[] = { | 840 | static unsigned int features[] = { |
778 | VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY, | 841 | VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY, |
779 | VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE, VIRTIO_BLK_F_SCSI, | 842 | VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE, VIRTIO_BLK_F_SCSI, |
780 | VIRTIO_BLK_F_WCE, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE | 843 | VIRTIO_BLK_F_WCE, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE, |
844 | VIRTIO_BLK_F_MQ, | ||
781 | }; | 845 | }; |
782 | 846 | ||
783 | static struct virtio_driver virtio_blk = { | 847 | static struct virtio_driver virtio_blk = { |